0.1 Time series analysis

0.1.1 Estimator properties and approaches to time series

### VISUALIZING DATA###


##REQUIRED DATA FILE: PESenergy.csv. Available from: http://dx.doi.org/10.7910/DVN/ARKOTI

#clean up
rm(list=ls())

##SECTION 3.1: UNIVARIATE GRAPHS IN THE base PACKAGE##
#load energy policy coverage data
pres.energy<-read.csv("PESenergy.csv")
#pres.energy<-read.csv("http://j.mp/PRESenergy")

#draw a histogram of TV coverage
hist(pres.energy$Energy,xlab="Television Stories",main="")
abline(h=0,col='gray60')
box()

#box-and-whisker plot of TV coverage
boxplot(pres.energy$Energy,ylab="Television Stories")

#box-and-whisker plots before and after Nixon speech
pres.energy$post.nixon<-c(rep(0,58),rep(1,122))
boxplot(pres.energy$Energy~pres.energy$post.nixon,
        axes=F,ylab="Television Stories")
axis(1,at=c(1,2),labels=c('Before Nov. 1973','After Nov. 1973'))
axis(2)
box()

#SCATTERPLOT#
#quick and dirty
plot(y=pres.energy$Energy,x=pres.energy$oilc)

#beautified
plot(y=pres.energy$Energy,x=pres.energy$oilc,
     xlab="Oil Price",ylab="Energy Coverage")
abline(lm(Energy~oilc,data=pres.energy))

#SECTION 3.2.1: LINE GRAPHS WITH plot#
#line plot of energy coverage by month
plot(x=pres.energy$Energy,type="l",axes=F,
     xlab='Month', ylab='Television Stories on Energy')
axis(1,at=c(1,37,73,109,145),
     labels=c('Jan. 1969','Jan. 1972','Jan. 1975','Jan. 1978','Jan. 1981'),
     cex.axis=.7)
axis(2)
abline(h=0,col='gray60')
box()

#alternative version of the line plot of monthly energy coverage
pres.energy$Time<-1:180
plot(y=pres.energy$Energy,x=pres.energy$Time,type="l")

#line plot of oil price per barrel by month
plot(x=pres.energy$oilc,type='l',axes=F,xlab='Month',ylab='Cost of Oil')
axis(1,at=c(1,37,73,109,145),
     labels=c('Jan. 1969','Jan. 1972','Jan. 1975','Jan. 1978','Jan. 1981'),
     cex.axis=.7)
axis(2)
box()

######################################################################
# simulate trending series

#set the sample size
n <- 300

#create a time index
t <- c(1:300)

#generate a disturbance term for x
delta <- rnorm(n)

#generate the variable x
x <- .1*t + delta

#generate a disturbance term for y
epsilon <- rnorm(n)

#generate the variable y
y <- .5*t + epsilon

#regress y on x
model.1 <- lm(y~x)

#view the results
summary(model.1)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.8203  -3.2465   0.2446   3.5211  15.9706 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.47275    0.57678   4.287 2.45e-05 ***
## x            4.84700    0.03305 146.675  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.078 on 298 degrees of freedom
## Multiple R-squared:  0.9863, Adjusted R-squared:  0.9863 
## F-statistic: 2.151e+04 on 1 and 298 DF,  p-value: < 2.2e-16
#### quick and dirty way to get some confidence intervals 
#install.packages("MBESS") #installation, only necessary once per machine
library(MBESS)

#the following command expects: 
#coefficient estimate, standard error, sample size, number of predictors
my.se<-summary(model.1)$sigma*sqrt(summary(model.1)$cov.unscaled[2,2])
my.se
## [1] 0.03304578
MBESS::ci.reg.coef(b.j=model.1$coefficients[2], SE.b.j=my.se, N=300, p=1)
## [1] "95 percent CI limits (with corresponding probability) around the jth population regression coefficient calculated using the (central) t-distribution with 298 degrees of freedom follow."
## $Lower.Limit.for.beta.j
##        x 
## 4.781967 
## 
## $Prob.Less.Lower
## [1] 0.025
## 
## $Upper.Limit.for.beta.j
##        x 
## 4.912032 
## 
## $Prob.Greater.Upper
## [1] 0.025
######################################################################
#differencing series

#set the sample size
n <- 300

#create a time index
t <- c(1:300)

#generate a disturbance term for x
delta <- rnorm(n)

#generate the variable x
x <- .1*t + delta

#generate a disturbance term for y
epsilon <- rnorm(n)

#generate the variable y
y <- .5*t + .5*x + epsilon

#regress y on x
model.1 <- lm(y~x)
summary(model.1)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.2596  -3.4637   0.1056   3.0085  13.5817 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.51811    0.59892   2.535   0.0118 *  
## x            5.39431    0.03433 157.143   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.231 on 298 degrees of freedom
## Multiple R-squared:  0.9881, Adjusted R-squared:  0.988 
## F-statistic: 2.469e+04 on 1 and 298 DF,  p-value: < 2.2e-16
#Install a package
#install.packages("timeSeries")
library(timeSeries)
## Loading required package: timeDate
#difference the series
d.x <- diff(x)
d.y <- diff(y)
d.t <- c(1:299)

#plot the series
plot(x=d.t,y=d.x,type='l')
lines(x=d.t,y=d.y,col='blue',lty=2)

#regress y on x
model.2 <- lm(d.y~d.x)
summary(model.2)
## 
## Call:
## lm(formula = d.y ~ d.x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0828 -1.0053 -0.0361  1.0490  3.7437 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.50267    0.08715   5.768 2.01e-08 ***
## d.x          0.46140    0.05837   7.905 5.30e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.503 on 297 degrees of freedom
## Multiple R-squared:  0.1738, Adjusted R-squared:  0.171 
## F-statistic: 62.49 on 1 and 297 DF,  p-value: 5.295e-14
######################################################################

0.1.2 ARIMA Models

knitr::include_graphics("Table6-1.pdf") 
ARIMA

ARIMA

###CHAPTER 9: TIME SERIES ANALYSIS###
###POLITICAL ANALYSIS USING R, BY JAMIE MONOGAN###

##REQUIRED DATA FILE: PESenergy.csv 

#clean up
rm(list=ls())

##SECTION 9.1: THE BOX-JENKINS METHOD##
#load data
#pres.energy<-read.csv("http://j.mp/PRESenergy")
pres.energy<-read.csv("PESenergy.csv")

#autocorrelation and partial autocorrelation functions
acf(pres.energy$Energy,lag.max=24)

pacf(pres.energy$Energy,lag.max=24)

#estimate ARIMA model
ar1.mod<-arima(pres.energy$Energy,order=c(1,0,0))
ar1.mod
## 
## Call:
## arima(x = pres.energy$Energy, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.8235    32.9020
## s.e.  0.0416     9.2403
## 
## sigma^2 estimated as 502.7:  log likelihood = -815.77,  aic = 1637.55
#diagnose model
tsdiag(ar1.mod,24)

#autocorrelation and partial autocorrelation functions on residuals
acf(ar1.mod$residuals,lag.max=24)

pacf(ar1.mod$residuals,lag.max=24)

#Ljung-Box Q test
Box.test(ar1.mod$residuals,lag=24,type='Ljung-Box')
## 
##  Box-Ljung test
## 
## data:  ar1.mod$residuals
## X-squared = 20.112, df = 24, p-value = 0.6904
######################################################################
#SECTION 9.1.1: TRANSFER FUNCTIONS VERSUS STATIC MODELS#
#Energy coverage examples showing static regression and intervention analysis
#static regression model with ARIMA error process
predictors<-as.matrix(subset(pres.energy,
                      select=c(rmn1173,grf0175,grf575,jec477,
                               jec1177,jec479,embargo,hostages,
                               oilc,Approval,Unemploy)))
static.mod<-stats::arima(pres.energy$Energy, 
                  order=c(1,0,0), 
                  xreg=predictors)
static.mod
## 
## Call:
## stats::arima(x = pres.energy$Energy, order = c(1, 0, 0), xreg = predictors)
## 
## Coefficients:
##          ar1  intercept  rmn1173  grf0175   grf575   jec477  jec1177
##       0.8222     5.8822  91.3265  31.8761  -8.2280  29.6446  -6.6967
## s.e.  0.0481    52.9008  15.0884  15.4643  15.2025  15.0831  15.0844
##         jec479  embargo  hostages    oilc  Approval  Unemploy
##       -20.1624  35.3247  -16.5001  0.8855   -0.2479    1.0080
## s.e.   15.2238  15.1200   13.7619  1.0192    0.2816    3.8909
## 
## sigma^2 estimated as 379.3:  log likelihood = -790.42,  aic = 1608.84
#load package
#install.packages('TSA')
library(TSA)
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:timeDate':
## 
##     kurtosis, skewness
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar

#estimate transfer function
dynamic.mod<-TSA::arimax(pres.energy$Energy,order=c(1,0,0),
                    xreg=predictors[,-1],
                    xtransf=predictors[,1],
                    transfer=list(c(1,0)))
dynamic.mod
## 
## Call:
## TSA::arimax(x = pres.energy$Energy, order = c(1, 0, 0), xreg = predictors[, 
##     -1], xtransf = predictors[, 1], transfer = list(c(1, 0)))
## 
## Coefficients:
##          ar1  intercept  grf0175   grf575   jec477  jec1177    jec479
##       0.8262    20.2787  31.5282  -7.9725  29.9820  -6.3304  -19.8179
## s.e.  0.0476    46.6870  13.8530  13.6104  13.5013  13.5011   13.6345
##       embargo  hostages    oilc  Approval  Unemploy  T1-AR1    T1-MA0
##       25.9388  -16.9015  0.5927   -0.2074    0.1660  0.6087  160.6241
## s.e.  13.2305   12.4422  0.9205    0.2495    3.5472  0.0230   17.0388
## 
## sigma^2 estimated as 305.1:  log likelihood = -770.83,  aic = 1569.66
#plot the dynamic effect of the intervention over the raw series
months<-c(1:180)
y.pred<-dynamic.mod$coef[2:12]%*%c(1,predictors[58,-1])+
  160.6241*predictors[,1]+160.6241*(.6087^(months-59))*as.numeric(months>59)
## Warning in dynamic.mod$coef[2:12] %*% c(1, predictors[58, -1]) + 160.6241 * : Recycling array of length 1 in array-vector arithmetic is deprecated.
##   Use c() or as.vector() instead.
plot(y=pres.energy$Energy,x=months,
     xlab="Month",ylab="Energy Policy Stories",
     type="l",axes=F)
axis(1,at=c(1,37,73,109,145),
     labels=c('Jan. 1969','Jan. 1972','Jan. 1975','Jan. 1978','Jan. 1981'))
axis(2)
box()     
lines(y=y.pred,x=months,lty=2,col='blue',lwd=2)

#plot predicted values of the series with a line, with true values as points
months<-c(1:180)
full.pred<-pres.energy$Energy-dynamic.mod$residuals
plot(y=full.pred,x=months,
     xlab="Month",ylab="Energy Policy Stories",
     type="l",ylim=c(0,225),axes=F)
points(y=pres.energy$Energy,x=months,pch=20)
legend(x=0,y=200,legend=c("Predicted","True"),pch=c(NA,20),lty=c(1,NA))
axis(1,at=c(1,37,73,109,145),
     labels=c('Jan. 1969','Jan. 1972','Jan. 1975','Jan. 1978','Jan. 1981'))
axis(2)
box()  

######################################################################


#### ESTIMATING A SEASONAL ARIMA MODEL ###
#front matter
rm(list=ls())
library(TSA)

#load data
data(co2)
#write.csv(co2,"co2.csv")
plot(co2)

#first difference model
co2.1 <- arima(co2, order=c(0,1,0))
plot(co2.1$residuals)

acf(co2.1$residuals,24)

pacf(co2.1$residuals,24)

#seasonal difference model
co2.2 <- TSA::arima(co2, order=c(0,1,0), 
                    seasonal=list(order=c(0,1,0), period=12))
plot(co2.2$residuals)

acf(co2.2$residuals,24)

pacf(co2.2$residuals,24)

#Moving Average Components for year and season
co2.3 <- TSA::arima(co2, order=c(0,1,1), 
               seasonal=list(order=c(0,1,1), period=12))

acf(co2.3$residuals,24)

pacf(co2.3$residuals,24)

Box.test(co2.3$residuals,24,"Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  co2.3$residuals
## X-squared = 25.891, df = 24, p-value = 0.3587
co2.3
## 
## Call:
## TSA::arima(x = co2, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1), 
##     period = 12))
## 
## Coefficients:
##           ma1     sma1
##       -0.5792  -0.8206
## s.e.   0.0791   0.1137
## 
## sigma^2 estimated as 0.5446:  log likelihood = -139.54,  aic = 283.08
######################################################################

0.1.3 Intervention Analysis and Forecasting

### FORECASTING WITH AN ARIMA MODEL ###
#front matter
rm(list=ls())
library(foreign)
library(TSA)

#Input Data
#data <- read.dta(file.choose())
data <- read.dta("HOME2X7.DTA")
#data<-read.dta("//spia.uga.edu/faculty_pages/monogan/teaching/ts/HOME2X7.DTA")

#Bind Two or More Time Series
data <- ts.union(data)

#Diagnose series 2
#Auto- and Cross- Covariance and -Correlation Function Estimation
acf(data$z2,20)

pacf(data$z2,20)

mod.z2 <- TSA::arima(data$z2, order=c(1,0,0))

acf(mod.z2$residuals,20)

pacf(mod.z2$residuals,20)

#Box-Pierce and Ljung-Box Tests
Box.test(mod.z2$residuals,20,"Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  mod.z2$residuals
## X-squared = 14.199, df = 20, p-value = 0.8202
#Projections with mod.z2
plot(mod.z2, n.ahead=50, type='l')

######################################################################
#Bush approval example in R

#load data & view series
#bush <- read.dta(file.choose())#BUSHJOB.DTA
bush <- read.dta("BUSHJOB.DTA")
#bush <- read.dta("//spia.uga.edu/faculty_pages/monogan/teaching/ts/BUSHJOB.DTA")
plot(y=bush$approve, x=bush$t, type='l')

#identify arima process
acf(bush$approve,20)

pacf(bush$approve,20)

#Estimate AR(1) model. Using a bit of theory to justify AR(1).
mod.1 <- arima(bush$approve, order=c(1,0,0))
mod.1
## 
## Call:
## arima(x = bush$approve, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9145    56.3717
## s.e.  0.0529     6.4375
## 
## sigma^2 estimated as 20.01:  log likelihood = -146.76,  aic = 297.52
#diagnose arima model
acf(mod.1$residuals,20)

pacf(mod.1$residuals,20)

Box.test(mod.1$residuals,20,"Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  mod.1$residuals
## X-squared = 13.303, df = 20, p-value = 0.864
#estimate intervention analysis for september 11 (remember to start with a pulse)
mod.2b <- arimax(bush$approve, order=c(1,0,0), 
                 xtransf=bush$s11, transfer=list(c(1,0)))
mod.2b
## 
## Call:
## arimax(x = bush$approve, order = c(1, 0, 0), xtransf = bush$s11, transfer = list(c(1, 
##     0)))
## 
## Coefficients:
##          ar1  intercept  T1-AR1   T1-MA0
##       0.8562    56.0327  0.8984  27.6660
## s.e.  0.2028     5.7056  0.0197   4.6373
## 
## sigma^2 estimated as 8.997:  log likelihood = -126.53,  aic = 261.06
#Notes: the second parameter is the numerator term. 
#If 0 only, then concurrent effect only. 
#The first parameter affects the denominator. 
#c(0,0) replicates the effect of just doing "xreg"
#Our parameter estimates look good, 
#no need to drop delta or switch to a step function.

#Graph the intervention model
y.pred <- 56.0327 + 27.6660*bush$s11 + 27.6660*(0.8984^(bush$t-9))*as.numeric(bush$t>9)
plot(y=bush$approve, x=bush$t, type='l')
lines(y=y.pred, x=bush$t, lty=2)

#We also can combine the AR(1) and intervention features into forecasts. 
#Do this SECOND, though:
full.pred<-bush$approve-mod.2b$residuals
plot(y=bush$approve, x=bush$t)
lines(y=full.pred, x=bush$t)

#expand into the onset of the war in Iraq
#note: the upward movement actually starts to happen one lag out
#simplest specification:
mod.3 <- arimax(bush$approve, order=c(1,0,0), 
                xtransf=cbind(bush$s11,bush$iraq), 
                transfer=list(c(1,0),c(1,0)))
mod.3
## 
## Call:
## arimax(x = bush$approve, order = c(1, 0, 0), xtransf = cbind(bush$s11, bush$iraq), 
##     transfer = list(c(1, 0), c(1, 0)))
## 
## Coefficients:
##          ar1  intercept  T1-AR1   T1-MA0  T2-AR1  T2-MA0
##       0.8526    56.1190  0.9025  26.0329  0.7592  3.9335
## s.e.  0.2017     6.1802  0.0193   4.6164  0.2114  3.0440
## 
## sigma^2 estimated as 8.509:  log likelihood = -125.12,  aic = 262.25
#Graph the new intervention model
y.pred <- 56.1190+ 26.0329*bush$s11 + 
  26.0329*(0.9025^(bush$t-9))*as.numeric(bush$t>9) + 
  3.9335*bush$iraq + 3.9335*(0.7592^(bush$t-27))*as.numeric(bush$t>27)
plot(y=bush$approve, x=bush$t, type='l')
lines(y=y.pred, x=bush$t, lty=2)

######################################################################

0.1.4 Transfer Functions

#exploring the 'ccf' function. Also, how to lag variables in R.
#clean up
rm(list=ls())

#The 'ccf' function allows a simple way to get cross-correlations.
#Oddly, 'x' refers to the presumed endogenous variable 
#and 'y' refers to the presumed exogenous variable.
#This is opposite of what is usually expected.
#Here is a Monte Carlo simulation describing how this works.
a<-rnorm(1000)
b<-rep(NA,1000)
b[1]<-0
for (i in 2:1000) b[i]<-.5*a[i-1]+rnorm(1)

#view our simulations
plot(a,type='l')
lines(b,lty=2,col='red')

#By the truth, lags of a should predict values of b.
#To get what we normally want from a CCF, 
#x is your endogenous variable and y is your exogenous
ccf(x=a,y=b,lag.max=5,
    xlab="Negative means x precedes y. Positive means y precedes x.")

ccf(x=b,y=a,lag.max=5,
    xlab="Negative means x precedes y. Positive means y precedes x.")

###ADDITIONAL ILLUSTRATION###
#Let's look at some cross-correlations of Greek tourism and terrorist attacks.
#We'll compare 'ccf' to Pearson correlation computation.

#Load data
#https://spia.uga.edu/faculty_pages/monogan/teaching/ts/italy.csv
data <- read.csv('italy.csv')

#View data
plot(data$GRSHARE, type='l')

plot(data$ATTKGR, type="l")

#CCF of unfiltered variables
ccf.output<-ccf(y=data$ATTKGR, x=data$GRSHARE, 12,
                xlab="Negative means x precedes y. Positive means y precedes x."); ccf.output

## 
## Autocorrelations of series 'X', by lag
## 
##    -12    -11    -10     -9     -8     -7     -6     -5     -4     -3 
## -0.046  0.136  0.064 -0.201 -0.095  0.133  0.159 -0.133 -0.099  0.229 
##     -2     -1      0      1      2      3      4      5      6      7 
##  0.227 -0.114 -0.085  0.184  0.129 -0.165 -0.106  0.162  0.100 -0.179 
##      8      9     10     11     12 
## -0.214  0.083  0.022 -0.181 -0.245
tour<-ts(data$GRSHARE)
terr<-ts(data$ATTKGR)
l.terr<-lag(terr,-1)
l2.terr<-lag(terr,-2)
l.tour<-lag(tour,-1)
l2.tour<-lag(tour,-2)
data.2<-na.omit(as.data.frame(ts.union(tour,terr,l.terr,l2.terr,l.tour,l2.tour)))

#compare
cor(data.2$tour,data.2$l.terr);ccf.output[1]
## [1] 0.1819211
## 
## Autocorrelations of series 'X', by lag
## 
##     1 
## 0.184
cor(data.2$tour,data.2$l2.terr);ccf.output[2]
## [1] 0.1324894
## 
## Autocorrelations of series 'X', by lag
## 
##     2 
## 0.129
cor(data.2$terr,data.2$l.tour);ccf.output[-1]
## [1] -0.1412183
## 
## Autocorrelations of series 'X', by lag
## 
##     -1 
## -0.114
cor(data.2$terr,data.2$l2.tour);ccf.output[-2]
## [1] 0.2313133
## 
## Autocorrelations of series 'X', by lag
## 
##    -2 
## 0.227
###################################################################

##TRANSFER FUNCTION EXAMPLE--right track example##
#Is the country on the right track?

#data<-read.csv(file.choose(), header=T) #RIGHTTRK.csv
data<-read.csv("RIGHTTRK.csv",header=T)

#view the data
plot(y=data$righttrk, x=data$time, type='l')
par(new=T)
plot(y=data$Employ, x=data$time, 
     type='l',lty=2, xlab="", ylab="",axes=F)
axis(4)

#identify ARIMA models
ccf(y=data$Employ, x=data$righttrk,20,
    xlab="Negative means x precedes y. Positive means y precedes x.")

acf(data$Employ,20)

pacf(data$Employ,20)

acf(data$righttrk,20)

pacf(data$righttrk,20)

#estimate ARIMA models
mod.righttrk <- arima(data$righttrk, order=c(1,0,0)); mod.righttrk
## 
## Call:
## arima(x = data$righttrk, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.8478    54.9556
## s.e.  0.0667     3.1035
## 
## sigma^2 estimated as 17.71:  log likelihood = -209.13,  aic = 422.26
mod.Employ <- arima(data$Employ, order=c(1,0,0)); mod.Employ
## 
## Call:
## arima(x = data$Employ, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9753     94.874
## s.e.  0.0202      0.500
## 
## sigma^2 estimated as 0.02077:  log likelihood = 36.33,  aic = -68.65
#diagnose ARIMA models
acf(mod.righttrk$residuals, 26)

pacf(mod.righttrk$residuals, 26)

Box.test(mod.righttrk$residuals, 26, "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  mod.righttrk$residuals
## X-squared = 33.907, df = 26, p-value = 0.1374
acf(mod.Employ$residuals, 26)

pacf(mod.Employ$residuals, 26)

Box.test(mod.Employ$residuals, 26, "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  mod.Employ$residuals
## X-squared = 22.368, df = 26, p-value = 0.6684
#identify a transfer function
ccf(y=mod.Employ$residuals, x=mod.righttrk$residuals,20,
    xlab="Negative means x precedes y. Positive means y precedes x.")

#Perhaps an effect at lag 8 of employment onto right track?
employ<-ts(data$Employ)
l8.employ<-lag(employ,-8)
track<-ts(data$righttrk)
data.2<-na.omit(as.data.frame(ts.union(employ,l8.employ,track)))
head(data.2)
##    employ l8.employ  track
## 9    95.1      94.7 52.246
## 10   95.3      94.8 52.270
## 11   95.4      94.8 51.528
## 12   95.3      94.9 53.929
## 13   95.4      95.1 61.740
## 14   95.4      95.0 64.358
tf.1 <- arimax(data.2$track, order=c(1,0,0), 
               xtransf=data.2$l8.employ, transfer=list(c(1,0)))
tf.1
## 
## Call:
## arimax(x = data.2$track, order = c(1, 0, 0), xtransf = data.2$l8.employ, transfer = list(c(1, 
##     0)))
## 
## Coefficients:
##          ar1  intercept   T1-AR1   T1-MA0
##       0.8440    56.7398  -0.3258  -0.0157
## s.e.  0.0722    13.4663   1.7226   0.1663
## 
## sigma^2 estimated as 18.72:  log likelihood = -188.07,  aic = 384.14
tf.2 <- arimax(data.2$track, order=c(1,0,0), 
               xtransf=data.2$l8.employ, transfer=list(c(1,1)))
## Warning in arimax(data.2$track, order = c(1, 0, 0), xtransf = data.
## 2$l8.employ, : possible convergence problem: optim gave code=1
tf.2
## 
## Call:
## arimax(x = data.2$track, order = c(1, 0, 0), xtransf = data.2$l8.employ, transfer = list(c(1, 
##     1)))
## 
## Coefficients:
##          ar1  intercept   T1-AR1   T1-MA0  T1-MA1
##       0.8484    34.6533  -0.0011  -3.7618  3.9804
## s.e.  0.1902  1111.5539   0.0122   6.4995  6.5469
## 
## sigma^2 estimated as 18.47:  log likelihood = -184.77,  aic = 379.53
tf.3 <- arimax(data.2$track, order=c(1,0,0), 
               xtransf=data.2$l8.employ, transfer=list(c(0,1)))
tf.3
## 
## Call:
## arimax(x = data.2$track, order = c(1, 0, 0), xtransf = data.2$l8.employ, transfer = list(c(0, 
##     1)))
## 
## Coefficients:
##          ar1  intercept   T1-MA0  T1-MA1
##       0.8426    43.5912  -3.6493  3.7770
## s.e.  0.1790  1058.4958   6.2860  6.2499
## 
## sigma^2 estimated as 18.51:  log likelihood = -184.82,  aic = 377.63
#Diagnose our results from the 8-lag model
acf(tf.3$residuals[-1],26)

pacf(tf.3$residuals[-1],26)

Box.test(tf.3$residuals[-1], 26, "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  tf.3$residuals[-1]
## X-squared = 39.422, df = 26, p-value = 0.04441
ccf(y=mod.Employ$residuals[-(1:8)], x=tf.3$residuals[-1], 20,
    xlab="Negative means x precedes y. Positive means y precedes x.")

############################################################

0.1.5 Regression Models for Dynamic Causation

#front matter
rm(list=ls())
#install.packages("lmtest")
#install.packages("dlnm")
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following object is masked from 'package:timeSeries':
## 
##     time<-
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dlnm)
## This is dlnm 2.3.6. For details: help(dlnm) and vignette('dlnmOverview').
library(foreign)
library(orcutt) 


#load data and create lag structure
bush<-read.dta("BUSHJOB.DTA")

t.s11<-ts(bush$s11)
t.iraq<-ts(bush$iraq)
t.approve<-ts(bush$approve)
lag.approve<-lag(t.approve,-1)

bush.2<-ts.union(t.s11,t.iraq,t.approve,lag.approve)

#run OLS models with and without a lagged DV (static and Koyck)
mod.no.lag<-lm(approve~s11+iraq,data=bush)
summary(mod.no.lag)
## 
## Call:
## lm(formula = approve ~ s11 + iraq, data = bush)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.273  -8.627  -3.345   5.477  24.913 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   59.692      1.564  38.166   <2e-16 ***
## s11           17.825     10.948   1.628    0.110    
## iraq           2.542     10.948   0.232    0.817    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.84 on 47 degrees of freedom
## Multiple R-squared:  0.05414,    Adjusted R-squared:  0.01389 
## F-statistic: 1.345 on 2 and 47 DF,  p-value: 0.2703
mod.lag<-lm(t.approve~lag.approve+t.s11+t.iraq,data=bush.2)
summary(mod.lag)
## 
## Call:
## lm(formula = t.approve ~ lag.approve + t.s11 + t.iraq, data = bush.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3815 -1.6173 -0.2157  1.0934  9.8784 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.67096    2.40748   1.525    0.134    
## lag.approve  0.93105    0.03916  23.778  < 2e-16 ***
## t.s11       24.16205    2.99376   8.071 2.72e-10 ***
## t.iraq       4.17758    2.98182   1.401    0.168    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.949 on 45 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.9303, Adjusted R-squared:  0.9256 
## F-statistic: 200.1 on 3 and 45 DF,  p-value: < 2.2e-16
#run Durbin-Watson and Breusch-Godfried tests
dwtest(mod.no.lag)
## 
##  Durbin-Watson test
## 
## data:  mod.no.lag
## DW = 0.1932, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(mod.no.lag)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  mod.no.lag
## LM test = 39.419, df = 1, p-value = 3.42e-10
bgtest(mod.lag)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  mod.lag
## LM test = 0.22139, df = 1, p-value = 0.638
#run Cochrane-Orcutt
mod.fgls <- cochrane.orcutt(mod.no.lag)
summary(mod.fgls)
## Call:
## lm(formula = approve ~ s11 + iraq, data = bush)
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  61.1730     6.7836   9.018 9.802e-12 ***
## s11           8.7648     3.1380   2.793  0.007582 ** 
## iraq         -1.1825     3.1380  -0.377  0.708017    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.2439 on 46 degrees of freedom
## Multiple R-squared:  0.1473 ,  Adjusted R-squared:  0.1102
## F-statistic: 4 on 2 and 46 DF,  p-value: < 2.563e-02
## 
## Durbin-Watson statistic 
## (original):    0.19320 , p-value: 7.342e-21
## (transformed): 1.27699 , p-value: 7.012e-03
#The results differ. In this case, 
#I'd say that pulse inputs probably require an LDV 
#for the full effect to enter the model.

####DYNAMICS####
#Unrestricted distributed lag model
lag.x<-lag(t.s11,-1)
lag2.x<-lag(t.s11,-2)
lag3.x<-lag(t.s11,-3)
lag4.x<-lag(t.s11,-4)
lag5.x<-lag(t.s11,-5)
bush.3<-ts.union(t.s11,t.iraq,t.approve,lag.approve,
                 lag.x,lag2.x,lag3.x,lag4.x,lag5.x)

mod.unrestricted <-lm(t.approve~t.s11+lag.x+lag2.x+lag3.x+
                        lag4.x+lag5.x,data=bush.3)
summary(mod.unrestricted)
## 
## Call:
## lm(formula = t.approve ~ t.s11 + lag.x + lag2.x + lag3.x + lag4.x + 
##     lag5.x, data = bush.3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.3096  -6.9172  -0.2124   5.4341  18.9708 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   57.613      1.327  43.404  < 2e-16 ***
## t.s11         19.903      8.395   2.371  0.02292 *  
## lag.x         26.594      8.395   3.168  0.00303 ** 
## lag2.x        26.992      8.395   3.215  0.00266 ** 
## lag3.x        24.977      8.395   2.975  0.00507 ** 
## lag4.x        22.382      8.395   2.666  0.01121 *  
## lag5.x        20.424      8.395   2.433  0.01980 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.289 on 38 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.5288, Adjusted R-squared:  0.4544 
## F-statistic: 7.108 on 6 and 38 DF,  p-value: 3.965e-05
mod.koyck<-lm(t.approve~lag.approve+t.s11,data=bush.2)
summary(mod.koyck)
## 
## Call:
## lm(formula = t.approve ~ lag.approve + t.s11, data = bush.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4769 -1.7113 -0.3193  1.0393  9.7688 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.84870    2.42917   1.584     0.12    
## lag.approve  0.92955    0.03955  23.504  < 2e-16 ***
## t.s11       24.06438    3.02411   7.958 3.42e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.98 on 46 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.9272, Adjusted R-squared:  0.9241 
## F-statistic: 293.1 on 2 and 46 DF,  p-value: < 2.2e-16
#Comparing coefficients
unrestricted<-mod.unrestricted$coef[-1]
koyck<-c(24.0644,24.0644*.9296,24.0644*.9296^2,
         24.0644*.9296^3,24.0644*.9296^4,24.0644*.9296^5)

plot(y=unrestricted,x=c(0:5),ylim=c(0,28),type='h',
     col='blue',main="Comparing Effects:Unrestricted in Blue")
par(new=T)
plot(y=koyck,x=c(0:5+.1),ylim=c(0,28),xlim=c(0,5),
     xlab="",ylab="",axes=F,type='h')
abline(h=0, col='gray60')

#Estimate an Almon Model
basis.s11<-crossbasis(bush$s11, vartype="poly", vardegree=2)
basis.t<-crossbasis(bush$t, vartype="poly", vardegree=2)
mod.almon<-lm(approve~basis.s11+iraq,data=bush)
summary(mod.almon)
## 
## Call:
## lm(formula = approve ~ basis.s11 + iraq, data = bush)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.273  -8.627  -3.345   5.477  24.913 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   59.692      1.564  38.166   <2e-16 ***
## basis.s11     22.231     13.655   1.628    0.110    
## iraq           2.542     10.948   0.232    0.817    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.84 on 47 degrees of freedom
## Multiple R-squared:  0.05414,    Adjusted R-squared:  0.01389 
## F-statistic: 1.345 on 2 and 47 DF,  p-value: 0.2703
mod.almon.2<-lm(approve~basis.t+s11+iraq,data=bush)
summary(mod.almon.2)
## 
## Call:
## lm(formula = approve ~ basis.t + s11 + iraq, data = bush)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.9082  -4.6723  -0.4318   4.0773  19.1607 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   69.328      2.660  26.060  < 2e-16 ***
## basis.t      -23.733      5.654  -4.197 0.000122 ***
## s11           11.296      9.538   1.184 0.242369    
## iraq           3.003      9.411   0.319 0.751082    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.314 on 46 degrees of freedom
## Multiple R-squared:  0.3161, Adjusted R-squared:  0.2715 
## F-statistic: 7.086 on 3 and 46 DF,  p-value: 0.0005173
############################################################

0.1.6 Feasible GLS and Additional Lag Structures

##MONTE CARLO CODE##
#What if we have serial correlation and use an LDV?
#How does the Koyck model fare in recovering the truth?
rm(list=ls())
library(TSA)
library(orcutt)
set.seed(10062010)

T<-500
nu<-rnorm(T+50)
d<-rnorm(T+50)

x0 <- rep(NA,T+50)
y0 <- rep(NA,T+50)
e0 <- rep(NA,T+50)
x0[1] <- d[1]
y0[1]<- e0[1] <- nu[1]


for(t in 2:(T+50)){
    x0[t]<- .5*x0[t-1] + d[t]
    e0[t]<- .1*e0[t-1] + nu[t]
    y0[t]<- .8*y0[t-1] + .3*x0[t] +e0[t]
    }


y<-ts(y0[51:(T+50)])
x<-ts(x0[51:(T+50)])
time<-c(1:T)
plot(y=y, x=time,type='l',xlab='time',ylab='y')
lines(y=x, x=time, lty=2, col='blue')
axis(4)
mtext("x", side=4)

data2 <- ts.union(y, l.y=lag(y, -1), x)
mod.1 <- lm(y~l.y+x, data=data2); summary(mod.1)
## 
## Call:
## lm(formula = y ~ l.y + x, data = data2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.06028 -0.70522 -0.01461  0.65556  2.74562 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.02946    0.04498  -0.655    0.513    
## l.y          0.85042    0.02153  39.497  < 2e-16 ***
## x            0.29060    0.03956   7.345  8.5e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.002 on 496 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.7718, Adjusted R-squared:  0.7708 
## F-statistic: 838.6 on 2 and 496 DF,  p-value: < 2.2e-16
###Just autocorrelation with LDV v. FGLS###
rm(list=ls())
T<-500
nu<-rnorm(T+50)
d<-rnorm(T+50)

x0 <- rep(NA,T+50)
y0 <- rep(NA,T+50)
e0 <- rep(NA,T+50)
x0[1] <- d[1]
y0[1]<- e0[1] <- nu[1]


for(t in 2:(T+50)){
    x0[t]<- .5*x0[t-1] + d[t]
    e0[t]<- .4*e0[t-1] + nu[t]
    y0[t]<- .3*x0[t] + e0[t]
    }

y<-ts(y0[51:(T+50)])
x<-ts(x0[51:(T+50)])
time<-c(1:T)
plot(y=y, x=time,type='l',xlab='time',ylab='y')
lines(y=x, x=time, lty=2, col='blue')
axis(4)
mtext("x", side=4)

data2 <- ts.union(y, l.y=lag(y, -1), x)
mod.koyck <- lm(y~x+l.y, data=data2)
summary(mod.koyck)
## 
## Call:
## lm(formula = y ~ x + l.y, data = data2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.02492 -0.68798  0.01846  0.63734  2.99190 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.008148   0.044946  -0.181    0.856    
## x            0.227516   0.038532   5.905 6.57e-09 ***
## l.y          0.327687   0.041033   7.986 9.79e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.003 on 496 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1901, Adjusted R-squared:  0.1869 
## F-statistic: 58.23 on 2 and 496 DF,  p-value: < 2.2e-16
mod.fgls.lag <- cochrane.orcutt(mod.koyck)
summary(mod.fgls.lag)
## Call:
## lm(formula = y ~ x + l.y, data = data2)
## 
##              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) -0.013375   0.074195  -0.180    0.8570    
## x            0.283946   0.043831   6.478 2.244e-10 ***
## l.y         -0.049168   0.043197  -1.138    0.2556    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9959 on 495 degrees of freedom
## Multiple R-squared:  0.0796 ,  Adjusted R-squared:  0.0759
## F-statistic: 21.4 on 2 and 495 DF,  p-value: < 1.202e-09
## 
## Durbin-Watson statistic 
## (original):    1.94459 , p-value: 2.551e-01
## (transformed): 1.98483 , p-value: 4.317e-01
mod.nolag <- lm(y~x, data=data2)
mod.fgls.nolag <- cochrane.orcutt(mod.nolag)
summary(mod.fgls.nolag)
## Call:
## lm(formula = y ~ x, data = data2)
## 
##              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) -0.010121   0.069285  -0.146    0.8839    
## x            0.283456   0.043382   6.534 1.589e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9951 on 497 degrees of freedom
## Multiple R-squared:  0.0791 ,  Adjusted R-squared:  0.0773
## F-statistic: 42.7 on 1 and 497 DF,  p-value: < 1.589e-10
## 
## Durbin-Watson statistic 
## (original):    1.28523 , p-value: 4.806e-16
## (transformed): 1.99836 , p-value: 4.9e-01
cbind(c(mod.koyck$coefficients,NA), 
      c(mod.fgls.lag$coefficients,mod.fgls.lag$rho), 
      c(mod.fgls.nolag$coefficients,NA,mod.fgls.nolag$rho))
##                     [,1]        [,2]        [,3]
## (Intercept) -0.008148304 -0.01337517 -0.01012083
## x            0.227515899  0.28394573  0.28345581
## l.y          0.327687496 -0.04916794          NA
##                       NA  0.39834895  0.35677715
###Just functional form with LDV v. FGLS###
rm(list=ls())
T<-500
nu<-rnorm(T+50)
d<-rnorm(T+50)

x0 <- rep(NA,T+50)
y0 <- rep(NA,T+50)
e0 <- rep(NA,T+50)
x0[1] <- d[1]
y0[1]<- e0[1] <- nu[1]


for(t in 2:(T+50)){
    x0[t]<- .5*x0[t-1] + d[t]
    e0[t]<- nu[t]
    y0[t]<- .4*y0[t-1] + .3*x0[t] + e0[t]
    }

y<-ts(y0[51:(T+50)])
x<-ts(x0[51:(T+50)])
time<-c(1:T)
plot(y=y, x=time,type='l',xlab='time',ylab='y')
lines(y=x, x=time, lty=2, col='blue')
axis(4)
mtext("x", side=4)

data2 <- ts.union(y, l.y=lag(y, -1), x)
mod.koyck <- lm(y~x+l.y, data=data2)
summary(mod.koyck)
## 
## Call:
## lm(formula = y ~ x + l.y, data = data2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9351 -0.6734 -0.0619  0.7154  3.7746 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.01251    0.04364   0.287    0.774    
## x            0.23714    0.03712   6.388 3.87e-10 ***
## l.y          0.43292    0.03865  11.201  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9744 on 496 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.2961, Adjusted R-squared:  0.2933 
## F-statistic: 104.3 on 2 and 496 DF,  p-value: < 2.2e-16
mod.fgls.lag <- cochrane.orcutt(mod.koyck)
summary(mod.fgls.lag)
## Call:
## lm(formula = y ~ x + l.y, data = data2)
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 0.013523   0.046736   0.289    0.7724    
## x           0.243893   0.038168   6.390 3.834e-10 ***
## l.y         0.384448   0.039722   9.678 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9748 on 495 degrees of freedom
## Multiple R-squared:  0.2527 ,  Adjusted R-squared:  0.2497
## F-statistic: 83.7 on 2 and 495 DF,  p-value: < 4.868e-32
## 
## Durbin-Watson statistic 
## (original):    1.96535 , p-value: 3.337e-01
## (transformed): 2.00304 , p-value: 4.987e-01
mod.nolag <- lm(y~x, data=data2)
mod.fgls.nolag <- cochrane.orcutt(mod.nolag)
summary(mod.fgls.nolag)
## Call:
## lm(formula = y ~ x, data = data2)
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 0.016079   0.079074   0.203     0.839    
## x           0.247686   0.041722   5.937 5.466e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9797 on 497 degrees of freedom
## Multiple R-squared:  0.0662 ,  Adjusted R-squared:  0.0643
## F-statistic: 35.2 on 1 and 497 DF,  p-value: < 5.466e-09
## 
## Durbin-Watson statistic 
## (original):    1.13855 , p-value: 1.953e-22
## (transformed): 1.98685 , p-value: 4.41e-01
cbind(c(mod.koyck$coefficients,NA), 
      c(mod.fgls.lag$coefficients,mod.fgls.lag$rho), 
      c(mod.fgls.nolag$coefficients,NA,mod.fgls.nolag$rho))
##                   [,1]       [,2]       [,3]
## (Intercept) 0.01251197 0.01352252 0.01607909
## x           0.23713503 0.24389315 0.24768556
## l.y         0.43292175 0.38444771         NA
##                     NA 0.06489019 0.44526542
##############################################################
#estimating two-step Aitken model.
#front matter
rm(list=ls())
#install.packages("lmtest")
#install.packages("dlnm")
library(lmtest)
library(dlnm)
library(foreign)
library(orcutt)
library(dyn)

#load data and declare as time series
qjps<-read.dta("QJPS113.dta")
ts.qjps<-ts(qjps)

#graph the data
par(mar=c(5,4,4,4))
plot(y=qjps$vi,x=qjps$time,type='l',
     xlab="Month",ylab="Vote Intention (Solid Black Line)",axes=F)
axis(1,at=seq(456,552,12),labels=c(1998:2006))
axis(2); box()

par(new=T)
plot(y=qjps$xrlag,x=qjps$time,type='l',lty=2,
     col='red',xlab="",ylab="",axes=F)
axis(4)
mtext("Exchange Rate (Red Dashed Line)",4,line=2.5)

#OLS with LDV
table.3.1.1<-dyn$lm(vi~lag(vi,-1)+usxr,data=ts.qjps)
summary(table.3.1.1)
## 
## Call:
## lm(formula = dyn(vi ~ lag(vi, -1) + usxr), data = ts.qjps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.1426  -1.4956   0.1575   1.4030  13.5110 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.1317     3.6498   0.584     0.56    
## lag(vi, -1)   0.8187     0.0577  14.188   <2e-16 ***
## usxr         10.4187     7.0063   1.487     0.14    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.303 on 109 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.7456, Adjusted R-squared:  0.7409 
## F-statistic: 159.7 on 2 and 109 DF,  p-value: < 2.2e-16
lmtest::bgtest(table.3.1.1)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  table.3.1.1
## LM test = 5.8403, df = 1, p-value = 0.01566
#Modeling lag as a function of lagged exchange rate (instrumental variable)
iv.step<-dyn$lm(lag(vi,-1)~lag(usxr,-1)+usxr,data=ts.qjps)
summary(iv.step)
## 
## Call:
## lm(formula = dyn(lag(vi, -1) ~ lag(usxr, -1) + usxr), data = ts.qjps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.947  -3.508  -1.250   3.619  14.670 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      7.587      6.094   1.245   0.2158  
## lag(usxr, -1)  -26.544     45.563  -0.583   0.5614  
## usxr            91.202     45.065   2.024   0.0454 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.474 on 109 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.2939, Adjusted R-squared:  0.2809 
## F-statistic: 22.68 on 2 and 109 DF,  p-value: 5.817e-09
qjps$l.vi.hat<-c(NA,iv.step$fitted.values)

#reset as time series
ts.qjps<-ts(qjps)

#Step 2 Regression
table.3.1.2<-dyn$lm(vi~l.vi.hat+usxr,data=ts.qjps)
summary(table.3.1.2)
## 
## Call:
## lm(formula = dyn(vi ~ l.vi.hat + usxr), data = ts.qjps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.319  -3.509  -1.339   3.980  12.777 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  10.1509    13.6871   0.742    0.460
## l.vi.hat     -0.3258     1.7472  -0.186    0.852
## usxr         85.4651   115.0038   0.743    0.459
## 
## Residual standard error: 5.572 on 109 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.276,  Adjusted R-squared:  0.2628 
## F-statistic: 20.78 on 2 and 109 DF,  p-value: 2.262e-08
bgtest(table.3.1.2)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  table.3.1.2
## LM test = 75.287, df = 1, p-value < 2.2e-16
#get the right residuals
vi<-ts(qjps$vi)
l.vi<-lag(vi,-1)
usxr<-ts(qjps$usxr)
resid.data<-as.data.frame(ts.union(vi,l.vi,usxr))
#r1<-qjps$vi-10.1509+0.3258*qjps$vilag-85.4651*qjps$usxr
r2<-resid.data$vi-10.1509+0.3258*resid.data$l.vi-85.4651*resid.data$usxr


#What's the rho term?
arima(r2,order=c(1,0,0))
## 
## Call:
## arima(x = r2, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.8953     0.4381
## s.e.  0.0411     2.6426
## 
## sigma^2 estimated as 9.84:  log likelihood = -287.77,  aic = 579.54
rho<-acf(na.omit(r2),1)$acf[[1]];rho

## [1] 0.8842049
#generalized differences
resid.data$g.vi<-NA
resid.data$g.usxr<-NA
for(t in 2:dim(resid.data)[1]){
    resid.data$g.vi[t]<-resid.data$vi[t]-rho*resid.data$vi[t-1]
    resid.data$g.usxr[t]<-resid.data$usxr[t]-rho*resid.data$usxr[t-1]
}

#final model
resid.data<-ts(resid.data[-c(1,114),])
table.3.1.3<-dyn$lm(g.vi~lag(g.vi,-1)+g.usxr,data=resid.data)
summary(table.3.1.3)
## 
## Call:
## lm(formula = dyn(g.vi ~ lag(g.vi, -1) + g.usxr), data = resid.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.9858  -1.5457   0.0666   1.5558   7.1528 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.32440    1.60952   3.929 0.000151 ***
## lag(g.vi, -1) -0.23718    0.08625  -2.750 0.006989 ** 
## g.usxr         3.73693   22.41442   0.167 0.867902    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.98 on 108 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.06576,    Adjusted R-squared:  0.04846 
## F-statistic: 3.801 on 2 and 108 DF,  p-value: 0.0254
############################################################

0.1.7 Structural Equations and Granger Causality Tests

#front matter
rm(list=ls())
#install.packages("lmtest")
#install.packages("dlnm")
library(lmtest)
library(dlnm)
library(foreign)
library(orcutt)
library(dyn)
library(systemfit)
## Loading required package: Matrix
## Loading required package: car
## Loading required package: carData
## 
## Please cite the 'systemfit' package as:
## Arne Henningsen and Jeff D. Hamann (2007). systemfit: A Package for Estimating Systems of Simultaneous Equations in R. Journal of Statistical Software 23(4), 1-40. http://www.jstatsoft.org/v23/i04/.
## 
## If you have questions, suggestions, or comments regarding the 'systemfit' package, please use a forum or 'tracker' at systemfit's R-Forge site:
## https://r-forge.r-project.org/projects/systemfit/
#load data and declare as time series
qjps<-read.dta("QJPS.dta")
ts.qjps<-ts(qjps)
qjps<-qjps[qjps$n>157,]

###Simultaneous Equation Model###
s1<-cpi~ir+usxr+xrlag1
s2<-ir~cpi+pm+pmlag1
inst <- ~ usxr+xrlag1+pm+pmlag1

table.4.3.1<-systemfit(list(cpi.mod=s1,ir.mod=s2),data=qjps,method="OLS")
summary(table.4.3.1)
## 
## systemfit results 
## method: OLS 
## 
##          N  DF     SSR detRCov   OLS-R2 McElroy-R2
## system 226 218 1307.72 5.18244 0.700934   0.814598
## 
##           N  DF       SSR       MSE     RMSE       R2   Adj R2
## cpi.mod 113 109 1237.2834 11.351224 3.369158 0.707024 0.698961
## ir.mod  113 109   70.4373  0.646214 0.803874 0.528911 0.515945
## 
## The covariance matrix of the residuals
##          cpi.mod   ir.mod
## cpi.mod 11.35122 1.467270
## ir.mod   1.46727 0.646214
## 
## The correlations of the residuals
##          cpi.mod   ir.mod
## cpi.mod 1.000000 0.541752
## ir.mod  0.541752 1.000000
## 
## 
## OLS estimates for 'cpi.mod' (equation 1)
## Model Formula: cpi ~ ir + usxr + xrlag1
## 
##               Estimate Std. Error   t value Pr(>|t|)    
## (Intercept) 156.017568   3.953925  39.45891  < 2e-16 ***
## ir           -3.701117   0.279837 -13.22599  < 2e-16 ***
## usxr         -8.358601  28.150272  -0.29693  0.76709    
## xrlag1      -43.667341  28.405818  -1.53727  0.12713    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.369158 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109 
## SSR: 1237.283423 MSE: 11.351224 Root MSE: 3.369158 
## Multiple R-Squared: 0.707024 Adjusted R-Squared: 0.698961 
## 
## 
## OLS estimates for 'ir.mod' (equation 2)
## Model Formula: ir ~ cpi + pm + pmlag1
## 
##                Estimate  Std. Error  t value   Pr(>|t|)    
## (Intercept) 13.03452875  3.34720513  3.89415 0.00017024 ***
## cpi         -0.08650602  0.02682997 -3.22423 0.00166699 ** 
## pm           0.01959371  0.01600461  1.22425 0.22349572    
## pmlag1       0.00580082  0.01439498  0.40298 0.68775579    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.803874 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109 
## SSR: 70.437308 MSE: 0.646214 Root MSE: 0.803874 
## Multiple R-Squared: 0.528911 Adjusted R-Squared: 0.515945
table.4.3.2<-systemfit(list(cpi.mod=s1,ir.mod=s2),
                       inst=inst,data=qjps,method="2SLS")
summary(table.4.3.2)
## 
## systemfit results 
## method: 2SLS 
## 
##          N  DF    SSR detRCov   OLS-R2 McElroy-R2
## system 226 218 1955.4 28.4058 0.552815   0.515335
## 
##           N  DF      SSR      MSE    RMSE        R2    Adj R2
## cpi.mod 113 109 1739.606 15.95969 3.99496  0.588080  0.576743
## ir.mod  113 109  215.793  1.97975 1.40704 -0.443237 -0.482959
## 
## The covariance matrix of the residuals
##          cpi.mod  ir.mod
## cpi.mod 15.95969 1.78617
## ir.mod   1.78617 1.97975
## 
## The correlations of the residuals
##          cpi.mod   ir.mod
## cpi.mod 1.000000 0.317764
## ir.mod  0.317764 1.000000
## 
## 
## 2SLS estimates for 'cpi.mod' (equation 1)
## Model Formula: cpi ~ ir + usxr + xrlag1
## Instruments: ~usxr + xrlag1 + pm + pmlag1
## 
##               Estimate Std. Error   t value Pr(>|t|)    
## (Intercept) 164.346777   4.887073  33.62888  < 2e-16 ***
## ir           -5.562665   0.452939 -12.28126  < 2e-16 ***
## usxr         23.681169  33.798162   0.70066 0.485005    
## xrlag1      -73.811562  34.050006  -2.16774 0.032354 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.994957 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109 
## SSR: 1739.605673 MSE: 15.959685 Root MSE: 3.994957 
## Multiple R-Squared: 0.58808 Adjusted R-Squared: 0.576743 
## 
## 
## 2SLS estimates for 'ir.mod' (equation 2)
## Model Formula: ir ~ cpi + pm + pmlag1
## Instruments: ~usxr + xrlag1 + pm + pmlag1
## 
##                Estimate  Std. Error  t value  Pr(>|t|)   
## (Intercept) -36.9937677  19.2515547 -1.92160 0.0572675 . 
## cpi           0.3158850   0.1547962  2.04065 0.0437021 * 
## pm            0.1369491   0.0513350  2.66775 0.0088007 **
## pmlag1        0.0564373   0.0312946  1.80342 0.0740852 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.407037 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109 
## SSR: 215.793103 MSE: 1.979753 Root MSE: 1.407037 
## Multiple R-Squared: -0.443237 Adjusted R-Squared: -0.482959
table.4.3.3<-systemfit(list(cpi.mod=s1,ir.mod=s2),
                       inst=inst,data=qjps,method="3SLS")
summary(table.4.3.3)
## 
## systemfit results 
## method: 3SLS 
## 
##          N  DF     SSR detRCov   OLS-R2 McElroy-R2
## system 226 218 1950.99 28.2562 0.553823   0.517459
## 
##           N  DF      SSR      MSE    RMSE        R2    Adj R2
## cpi.mod 113 109 1735.547 15.92245 3.99029  0.589041  0.577730
## ir.mod  113 109  215.442  1.97653 1.40589 -0.440889 -0.480547
## 
## The covariance matrix of the residuals used for estimation
##          cpi.mod  ir.mod
## cpi.mod 15.95969 1.78617
## ir.mod   1.78617 1.97975
## 
## The covariance matrix of the residuals
##          cpi.mod  ir.mod
## cpi.mod 15.92245 1.79305
## ir.mod   1.79305 1.97653
## 
## The correlations of the residuals
##         cpi.mod  ir.mod
## cpi.mod 1.00000 0.31962
## ir.mod  0.31962 1.00000
## 
## 
## 3SLS estimates for 'cpi.mod' (equation 1)
## Model Formula: cpi ~ ir + usxr + xrlag1
## Instruments: ~usxr + xrlag1 + pm + pmlag1
## 
##              Estimate Std. Error   t value Pr(>|t|)    
## (Intercept) 164.15753    4.88292  33.61872  < 2e-16 ***
## ir           -5.55250    0.45281 -12.26231  < 2e-16 ***
## usxr         13.84620   32.13604   0.43086 0.667419    
## xrlag1      -63.76429   32.32692  -1.97248 0.051087 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.990294 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109 
## SSR: 1735.547016 MSE: 15.92245 Root MSE: 3.990294 
## Multiple R-Squared: 0.589041 Adjusted R-Squared: 0.57773 
## 
## 
## 3SLS estimates for 'ir.mod' (equation 2)
## Model Formula: ir ~ cpi + pm + pmlag1
## Instruments: ~usxr + xrlag1 + pm + pmlag1
## 
##                Estimate  Std. Error  t value  Pr(>|t|)   
## (Intercept) -36.9294610  19.2488123 -1.91853 0.0576594 . 
## cpi           0.3153880   0.1547759  2.03771 0.0440009 * 
## pm            0.1382749   0.0508960  2.71681 0.0076681 **
## pmlag1        0.0548498   0.0302491  1.81327 0.0725421 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.405892 on 109 degrees of freedom
## Number of observations: 113 Degrees of Freedom: 109 
## SSR: 215.442046 MSE: 1.976533 Root MSE: 1.405892 
## Multiple R-Squared: -0.440889 Adjusted R-Squared: -0.480547
###Granger test: 12 lags###
cpi.lag.only<-dyn$lm(cpi~lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4)+
                       lag(cpi,-5)+lag(cpi,-6)+lag(cpi,-7)+lag(cpi,-8)+
                       lag(cpi,-9)+lag(cpi,-10)+lag(cpi,-11)+lag(cpi,-12),
                     data=ts.qjps)
summary(cpi.lag.only)
## 
## Call:
## lm(formula = dyn(cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, 
##     -3) + lag(cpi, -4) + lag(cpi, -5) + lag(cpi, -6) + lag(cpi, 
##     -7) + lag(cpi, -8) + lag(cpi, -9) + lag(cpi, -10) + lag(cpi, 
##     -11) + lag(cpi, -12)), data = ts.qjps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.99402 -0.17105 -0.01832  0.16813  1.81805 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.41291    0.13232   3.120  0.00202 ** 
## lag(cpi, -1)   1.11975    0.06382  17.547  < 2e-16 ***
## lag(cpi, -2)  -0.11177    0.09587  -1.166  0.24483    
## lag(cpi, -3)  -0.14885    0.09542  -1.560  0.12009    
## lag(cpi, -4)   0.15668    0.09502   1.649  0.10042    
## lag(cpi, -5)   0.01745    0.09553   0.183  0.85517    
## lag(cpi, -6)   0.15824    0.09439   1.676  0.09494 .  
## lag(cpi, -7)  -0.24562    0.09442  -2.601  0.00985 ** 
## lag(cpi, -8)   0.06259    0.09557   0.655  0.51313    
## lag(cpi, -9)  -0.20611    0.09574  -2.153  0.03232 *  
## lag(cpi, -10)  0.18972    0.09684   1.959  0.05124 .  
## lag(cpi, -11)  0.04116    0.09735   0.423  0.67279    
## lag(cpi, -12) -0.03535    0.06449  -0.548  0.58416    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3328 on 245 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9996 
## F-statistic: 5.999e+04 on 12 and 245 DF,  p-value: < 2.2e-16
cpi.full<-dyn$lm(cpi~lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4)+
                   lag(cpi,-5)+lag(cpi,-6)+lag(cpi,-7)+lag(cpi,-8)+
                   lag(cpi,-9)+lag(cpi,-10)+lag(cpi,-11)+lag(cpi,-12)+
                   lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4)+lag(ir,-5)+
                   lag(ir,-6)+lag(ir,-7)+lag(ir,-8)+lag(ir,-9)+lag(ir,-10)+
                   lag(ir,-11)+lag(ir,-12),data=ts.qjps)
summary(cpi.full)
## 
## Call:
## lm(formula = dyn(cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, 
##     -3) + lag(cpi, -4) + lag(cpi, -5) + lag(cpi, -6) + lag(cpi, 
##     -7) + lag(cpi, -8) + lag(cpi, -9) + lag(cpi, -10) + lag(cpi, 
##     -11) + lag(cpi, -12) + lag(ir, -1) + lag(ir, -2) + lag(ir, 
##     -3) + lag(ir, -4) + lag(ir, -5) + lag(ir, -6) + lag(ir, -7) + 
##     lag(ir, -8) + lag(ir, -9) + lag(ir, -10) + lag(ir, -11) + 
##     lag(ir, -12)), data = ts.qjps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.89517 -0.17484 -0.00708  0.16823  1.53117 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.634293   0.271712  -2.334  0.02042 *  
## lag(cpi, -1)   0.983199   0.065865  14.927  < 2e-16 ***
## lag(cpi, -2)  -0.097529   0.092011  -1.060  0.29025    
## lag(cpi, -3)  -0.130793   0.091586  -1.428  0.15460    
## lag(cpi, -4)   0.136000   0.091452   1.487  0.13833    
## lag(cpi, -5)  -0.002999   0.091925  -0.033  0.97400    
## lag(cpi, -6)   0.154568   0.090882   1.701  0.09032 .  
## lag(cpi, -7)  -0.232758   0.091303  -2.549  0.01144 *  
## lag(cpi, -8)   0.062402   0.092556   0.674  0.50085    
## lag(cpi, -9)  -0.147114   0.092663  -1.588  0.11373    
## lag(cpi, -10)  0.140856   0.093565   1.505  0.13357    
## lag(cpi, -11)  0.048085   0.093630   0.514  0.60804    
## lag(cpi, -12)  0.093170   0.067274   1.385  0.16740    
## lag(ir, -1)    0.110335   0.036018   3.063  0.00245 ** 
## lag(ir, -2)    0.012948   0.044259   0.293  0.77012    
## lag(ir, -3)    0.013603   0.042807   0.318  0.75095    
## lag(ir, -4)   -0.080518   0.040882  -1.970  0.05008 .  
## lag(ir, -5)    0.037984   0.040424   0.940  0.34838    
## lag(ir, -6)    0.025923   0.040154   0.646  0.51917    
## lag(ir, -7)    0.001241   0.040575   0.031  0.97564    
## lag(ir, -8)    0.007939   0.040592   0.196  0.84511    
## lag(ir, -9)   -0.013999   0.038523  -0.363  0.71664    
## lag(ir, -10)   0.038416   0.038208   1.005  0.31573    
## lag(ir, -11)  -0.043406   0.036749  -1.181  0.23875    
## lag(ir, -12)  -0.032090   0.028373  -1.131  0.25922    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3126 on 233 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9997 
## F-statistic: 3.4e+04 on 24 and 233 DF,  p-value: < 2.2e-16
anova(cpi.full,cpi.lag.only)
## Analysis of Variance Table
## 
## Model 1: cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4) + 
##     lag(cpi, -5) + lag(cpi, -6) + lag(cpi, -7) + lag(cpi, -8) + 
##     lag(cpi, -9) + lag(cpi, -10) + lag(cpi, -11) + lag(cpi, -12) + 
##     lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4) + lag(ir, 
##     -5) + lag(ir, -6) + lag(ir, -7) + lag(ir, -8) + lag(ir, -9) + 
##     lag(ir, -10) + lag(ir, -11) + lag(ir, -12)
## Model 2: cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4) + 
##     lag(cpi, -5) + lag(cpi, -6) + lag(cpi, -7) + lag(cpi, -8) + 
##     lag(cpi, -9) + lag(cpi, -10) + lag(cpi, -11) + lag(cpi, -12)
##   Res.Df    RSS  Df Sum of Sq     F    Pr(>F)    
## 1    233 22.773                                  
## 2    245 27.142 -12    -4.369 3.725 3.666e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ir.lag.only<-dyn$lm(ir~lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4)+
                      lag(ir,-5)+lag(ir,-6)+lag(ir,-7)+lag(ir,-8)+
                      lag(ir,-9)+lag(ir,-10)+lag(ir,-11)+lag(ir,-12),
                    data=ts.qjps)
summary(ir.lag.only)
## 
## Call:
## lm(formula = dyn(ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + 
##     lag(ir, -4) + lag(ir, -5) + lag(ir, -6) + lag(ir, -7) + lag(ir, 
##     -8) + lag(ir, -9) + lag(ir, -10) + lag(ir, -11) + lag(ir, 
##     -12)), data = ts.qjps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.15071 -0.24305 -0.01453  0.20893  2.43682 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.138286   0.094218   1.468  0.14346    
## lag(ir, -1)   0.809780   0.063428  12.767  < 2e-16 ***
## lag(ir, -2)   0.234107   0.081857   2.860  0.00460 ** 
## lag(ir, -3)  -0.102744   0.079026  -1.300  0.19478    
## lag(ir, -4)   0.019182   0.075635   0.254  0.80001    
## lag(ir, -5)   0.111522   0.074239   1.502  0.13433    
## lag(ir, -6)  -0.202014   0.073754  -2.739  0.00661 ** 
## lag(ir, -7)   0.105702   0.074590   1.417  0.15772    
## lag(ir, -8)   0.007061   0.074721   0.095  0.92479    
## lag(ir, -9)  -0.003964   0.070756  -0.056  0.95537    
## lag(ir, -10)  0.095079   0.070195   1.354  0.17683    
## lag(ir, -11) -0.004996   0.067811  -0.074  0.94133    
## lag(ir, -12) -0.092204   0.051588  -1.787  0.07512 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5886 on 245 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.9706, Adjusted R-squared:  0.9692 
## F-statistic: 674.2 on 12 and 245 DF,  p-value: < 2.2e-16
ir.full<-dyn$lm(ir~lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4)+
                  lag(ir,-5)+lag(ir,-6)+lag(ir,-7)+lag(ir,-8)+
                  lag(ir,-9)+lag(ir,-10)+lag(ir,-11)+lag(ir,-12)+
                  lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4)+
                  lag(cpi,-5)+lag(cpi,-6)+lag(cpi,-7)+lag(cpi,-8)+
                  lag(cpi,-9)+lag(cpi,-10)+lag(cpi,-11)+lag(cpi,-12),
                data=ts.qjps)
summary(ir.full)
## 
## Call:
## lm(formula = dyn(ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + 
##     lag(ir, -4) + lag(ir, -5) + lag(ir, -6) + lag(ir, -7) + lag(ir, 
##     -8) + lag(ir, -9) + lag(ir, -10) + lag(ir, -11) + lag(ir, 
##     -12) + lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, 
##     -4) + lag(cpi, -5) + lag(cpi, -6) + lag(cpi, -7) + lag(cpi, 
##     -8) + lag(cpi, -9) + lag(cpi, -10) + lag(cpi, -11) + lag(cpi, 
##     -12)), data = ts.qjps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.16549 -0.26191  0.00632  0.25444  2.30835 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.629006   0.497254   3.276  0.00121 ** 
## lag(ir, -1)    0.726639   0.065915  11.024  < 2e-16 ***
## lag(ir, -2)    0.208562   0.080997   2.575  0.01064 *  
## lag(ir, -3)   -0.090662   0.078340  -1.157  0.24834    
## lag(ir, -4)    0.006675   0.074817   0.089  0.92898    
## lag(ir, -5)    0.084879   0.073980   1.147  0.25242    
## lag(ir, -6)   -0.186244   0.073485  -2.534  0.01192 *  
## lag(ir, -7)    0.093743   0.074255   1.262  0.20805    
## lag(ir, -8)    0.010252   0.074286   0.138  0.89036    
## lag(ir, -9)    0.034446   0.070499   0.489  0.62559    
## lag(ir, -10)   0.076992   0.069924   1.101  0.27199    
## lag(ir, -11)  -0.008874   0.067254  -0.132  0.89514    
## lag(ir, -12)  -0.078914   0.051925  -1.520  0.12993    
## lag(cpi, -1)   0.195327   0.120538   1.620  0.10649    
## lag(cpi, -2)   0.076149   0.168387   0.452  0.65153    
## lag(cpi, -3)  -0.092792   0.167609  -0.554  0.58037    
## lag(cpi, -4)   0.108255   0.167364   0.647  0.51838    
## lag(cpi, -5)   0.021274   0.168230   0.126  0.89948    
## lag(cpi, -6)  -0.235922   0.166322  -1.418  0.15739    
## lag(cpi, -7)   0.090293   0.167093   0.540  0.58945    
## lag(cpi, -8)  -0.221096   0.169385  -1.305  0.19308    
## lag(cpi, -9)   0.196854   0.169582   1.161  0.24690    
## lag(cpi, -10)  0.051456   0.171232   0.301  0.76406    
## lag(cpi, -11) -0.175315   0.171350  -1.023  0.30730    
## lag(cpi, -12) -0.027991   0.123116  -0.227  0.82035    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5721 on 233 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.9736, Adjusted R-squared:  0.9709 
## F-statistic: 357.9 on 24 and 233 DF,  p-value: < 2.2e-16
anova(ir.full,ir.lag.only)
## Analysis of Variance Table
## 
## Model 1: ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4) + 
##     lag(ir, -5) + lag(ir, -6) + lag(ir, -7) + lag(ir, -8) + lag(ir, 
##     -9) + lag(ir, -10) + lag(ir, -11) + lag(ir, -12) + lag(cpi, 
##     -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4) + lag(cpi, 
##     -5) + lag(cpi, -6) + lag(cpi, -7) + lag(cpi, -8) + lag(cpi, 
##     -9) + lag(cpi, -10) + lag(cpi, -11) + lag(cpi, -12)
## Model 2: ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4) + 
##     lag(ir, -5) + lag(ir, -6) + lag(ir, -7) + lag(ir, -8) + lag(ir, 
##     -9) + lag(ir, -10) + lag(ir, -11) + lag(ir, -12)
##   Res.Df    RSS  Df Sum of Sq      F  Pr(>F)  
## 1    233 76.273                               
## 2    245 84.878 -12   -8.6051 2.1906 0.01289 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###Granger test: 4 lags###
cpi.lag.only.4<-dyn$lm(cpi~lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4),
                       data=ts.qjps)
summary(cpi.lag.only.4)
## 
## Call:
## lm(formula = dyn(cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, 
##     -3) + lag(cpi, -4)), data = ts.qjps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9462 -0.1850 -0.0174  0.1911  1.9337 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.46440    0.10808   4.297 2.45e-05 ***
## lag(cpi, -1)  1.12383    0.06008  18.707  < 2e-16 ***
## lag(cpi, -2) -0.13534    0.09095  -1.488 0.137949    
## lag(cpi, -3) -0.22444    0.09108  -2.464 0.014382 *  
## lag(cpi, -4)  0.23366    0.06005   3.891 0.000127 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3407 on 261 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9997 
## F-statistic: 1.912e+05 on 4 and 261 DF,  p-value: < 2.2e-16
cpi.full.4<-dyn$lm(cpi~lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4)+
                     lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4),
                   data=ts.qjps)
summary(cpi.full.4)
## 
## Call:
## lm(formula = dyn(cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, 
##     -3) + lag(cpi, -4) + lag(ir, -1) + lag(ir, -2) + lag(ir, 
##     -3) + lag(ir, -4)), data = ts.qjps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.94473 -0.16541 -0.01108  0.16659  1.79147 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.28097    0.22790  -1.233 0.218744    
## lag(cpi, -1)  1.06963    0.05895  18.146  < 2e-16 ***
## lag(cpi, -2) -0.12067    0.08756  -1.378 0.169343    
## lag(cpi, -3) -0.23088    0.08783  -2.629 0.009090 ** 
## lag(cpi, -4)  0.28524    0.05918   4.820 2.46e-06 ***
## lag(ir, -1)   0.10257    0.02961   3.464 0.000622 ***
## lag(ir, -2)  -0.03079    0.03844  -0.801 0.423779    
## lag(ir, -3)   0.03624    0.03714   0.976 0.330154    
## lag(ir, -4)  -0.06980    0.02838  -2.459 0.014588 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.326 on 257 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9997 
## F-statistic: 1.045e+05 on 8 and 257 DF,  p-value: < 2.2e-16
anova(cpi.full.4,cpi.lag.only.4)
## Analysis of Variance Table
## 
## Model 1: cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4) + 
##     lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4)
## Model 2: cpi ~ lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4)
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)    
## 1    257 27.313                                 
## 2    261 30.304 -4   -2.9912 7.0365 2.17e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ir.lag.only.4<-dyn$lm(ir~lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4),
                      data=ts.qjps)
summary(ir.lag.only.4)
## 
## Call:
## lm(formula = dyn(ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + 
##     lag(ir, -4)), data = ts.qjps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2707 -0.2835  0.0066  0.2200  4.4356 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.10116    0.10103   1.001   0.3176    
## lag(ir, -1)  0.81953    0.05842  14.029   <2e-16 ***
## lag(ir, -2)  0.16754    0.07695   2.177   0.0304 *  
## lag(ir, -3) -0.02140    0.07450  -0.287   0.7741    
## lag(ir, -4)  0.01727    0.05670   0.304   0.7610    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6576 on 261 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.963,  Adjusted R-squared:  0.9624 
## F-statistic:  1698 on 4 and 261 DF,  p-value: < 2.2e-16
ir.full.4<-dyn$lm(ir~lag(ir,-1)+lag(ir,-2)+lag(ir,-3)+lag(ir,-4)+
                    lag(cpi,-1)+lag(cpi,-2)+lag(cpi,-3)+lag(cpi,-4),
                  data=ts.qjps)
summary(ir.full.4)
## 
## Call:
## lm(formula = dyn(ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + 
##     lag(ir, -4) + lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + 
##     lag(cpi, -4)), data = ts.qjps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3024 -0.2824  0.0266  0.2453  4.2411 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.15920    0.45550   2.545   0.0115 *  
## lag(ir, -1)   0.78797    0.05917  13.316   <2e-16 ***
## lag(ir, -2)   0.16350    0.07682   2.128   0.0343 *  
## lag(ir, -3)  -0.03079    0.07424  -0.415   0.6787    
## lag(ir, -4)   0.01576    0.05673   0.278   0.7813    
## lag(cpi, -1)  0.01351    0.11781   0.115   0.9088    
## lag(cpi, -2)  0.16169    0.17500   0.924   0.3564    
## lag(cpi, -3) -0.03366    0.17555  -0.192   0.8481    
## lag(cpi, -4) -0.15051    0.11828  -1.272   0.2043    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6516 on 257 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.9642, Adjusted R-squared:  0.9631 
## F-statistic: 865.9 on 8 and 257 DF,  p-value: < 2.2e-16
anova(ir.full.4,ir.lag.only.4)
## Analysis of Variance Table
## 
## Model 1: ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4) + 
##     lag(cpi, -1) + lag(cpi, -2) + lag(cpi, -3) + lag(cpi, -4)
## Model 2: ir ~ lag(ir, -1) + lag(ir, -2) + lag(ir, -3) + lag(ir, -4)
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1    257 109.11                              
## 2    261 112.87 -4   -3.7584 2.2132 0.06801 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###Doing 2SLS the hard way###
iv.ir<-lm(ir~usxr+xrlag1+pm+pmlag1,data=qjps)
summary(iv.ir)
## 
## Call:
## lm(formula = ir ~ usxr + xrlag1 + pm + pmlag1, data = qjps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.43061 -0.55177 -0.09273  0.50482  1.93529 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.46801    0.88256   6.196 1.08e-08 ***
## usxr          6.64162    6.56012   1.012 0.313598    
## xrlag1      -12.45636    6.57247  -1.895 0.060736 .  
## pm            0.04789    0.01374   3.485 0.000713 ***
## pmlag1        0.02204    0.01381   1.596 0.113323    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7886 on 108 degrees of freedom
## Multiple R-squared:  0.5508, Adjusted R-squared:  0.5342 
## F-statistic: 33.11 on 4 and 108 DF,  p-value: < 2.2e-16
qjps$ir.hat<-iv.ir$fitted.values
cpi.stage.2<-lm(cpi~ir.hat+usxr+xrlag1,data=qjps)
summary(cpi.stage.2)
## 
## Call:
## lm(formula = cpi ~ ir.hat + usxr + xrlag1, data = qjps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6165 -1.8963 -0.3378  1.5081  8.4451 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 164.3468     3.3465  49.109  < 2e-16 ***
## ir.hat       -5.5627     0.3102 -17.935  < 2e-16 ***
## usxr         23.6812    23.1441   1.023  0.30848    
## xrlag1      -73.8116    23.3166  -3.166  0.00201 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.736 on 109 degrees of freedom
## Multiple R-squared:  0.8068, Adjusted R-squared:  0.8015 
## F-statistic: 151.8 on 3 and 109 DF,  p-value: < 2.2e-16
iv.cpi<-lm(cpi~usxr+xrlag1+pm+pmlag1,data=qjps)
summary(iv.cpi)
## 
## Call:
## lm(formula = cpi ~ usxr + xrlag1 + pm + pmlag1, data = qjps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.1700 -1.8766 -0.4139  1.4373  8.4114 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 133.95144    3.07459  43.567  < 2e-16 ***
## usxr        -13.26649   22.85356  -0.581   0.5628    
## xrlag1       -4.59976   22.89660  -0.201   0.8412    
## pm           -0.27964    0.04787  -5.841 5.55e-08 ***
## pmlag1       -0.10873    0.04810  -2.260   0.0258 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.747 on 108 degrees of freedom
## Multiple R-squared:  0.807,  Adjusted R-squared:  0.7998 
## F-statistic: 112.9 on 4 and 108 DF,  p-value: < 2.2e-16
qjps$cpi.hat<-iv.cpi$fitted.values
ir.stage.2<-lm(ir~cpi.hat+pm+pmlag1,data=qjps)
summary(cpi.stage.2)
## 
## Call:
## lm(formula = cpi ~ ir.hat + usxr + xrlag1, data = qjps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6165 -1.8963 -0.3378  1.5081  8.4451 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 164.3468     3.3465  49.109  < 2e-16 ***
## ir.hat       -5.5627     0.3102 -17.935  < 2e-16 ***
## usxr         23.6812    23.1441   1.023  0.30848    
## xrlag1      -73.8116    23.3166  -3.166  0.00201 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.736 on 109 degrees of freedom
## Multiple R-squared:  0.8068, Adjusted R-squared:  0.8015 
## F-statistic: 151.8 on 3 and 109 DF,  p-value: < 2.2e-16
############################################################

#Example direct Granger test in R

#load data and create ts.union
data<-read.csv('IParmsRace.csv')

y<-as.ts(data$India)
x<-as.ts(data$Pakistan)
l.y<-lag(data$India,-1)
l2.y<-lag(data$India,-2)
l3.y<-lag(data$India,-3)
l4.y<-lag(data$India,-4)
l.x<-lag(data$Pakistan,-1)
l2.x<-lag(data$Pakistan,-2)
l3.x<-lag(data$Pakistan,-3)
l4.x<-lag(data$Pakistan,-4)
data.2<-ts.union(y,x,l.y,l2.y,l3.y,l4.y,l.x,l2.x,l3.x,l4.x,dframe=TRUE)
#I'm going with 4 lags here, but it's up to you!

#Suppose I believe Pakistan's expenditures cause India's.
#Start with linear models of y.
y.with.x<-lm(y~l.y+l2.y+l3.y+l4.y+l.x+l2.x+l3.x+l4.x,data=data.2)
y.without.x<-lm(y~l.y+l2.y+l3.y+l4.y,data=data.2)

#Block F-test that Pakistan informs our conditional expectation of India:
anova(y.with.x, y.without.x)
## Analysis of Variance Table
## 
## Model 1: y ~ l.y + l2.y + l3.y + l4.y + l.x + l2.x + l3.x + l4.x
## Model 2: y ~ l.y + l2.y + l3.y + l4.y
##   Res.Df     RSS Df Sum of Sq      F Pr(>F)
## 1      3  453103                           
## 2      7 1754921 -4  -1301817 2.1548 0.2772
#It's not significant.

#Does India's expenditures cause Pakistan's?
x.with.y<-lm(x~l.x+l2.x+l3.x+l4.x+l.y+l2.y+l3.y+l4.y,data=data.2)
x.without.y<-lm(x~l.x+l2.x+l3.x+l4.x,data=data.2)

#Block F-test that India informs our conditional expectation of Pakistan:
anova(x.with.y, x.without.y)
## Analysis of Variance Table
## 
## Model 1: x ~ l.x + l2.x + l3.x + l4.x + l.y + l2.y + l3.y + l4.y
## Model 2: x ~ l.x + l2.x + l3.x + l4.x
##   Res.Df   RSS Df Sum of Sq      F  Pr(>F)  
## 1      3  2254                              
## 2      7 74298 -4    -72045 23.976 0.01297 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#It's significant, which suggests India's expenditures are 
#exogenous to Pakistan's (consistent with Freeman 1983).

#QUICK ROBUSTNESS CHECK: TWO LAGS
#Does Pakistan influence India?
y.with.x<-lm(y~l.y+l2.y+l.x+l2.x,data=data.2)
y.without.x<-lm(y~l.y+l2.y,data=data.2)
anova(y.with.x, y.without.x)
## Analysis of Variance Table
## 
## Model 1: y ~ l.y + l2.y + l.x + l2.x
## Model 2: y ~ l.y + l2.y
##   Res.Df     RSS Df Sum of Sq      F Pr(>F)
## 1      9 1791150                           
## 2     11 2746479 -2   -955329 2.4001 0.1461
#Does India Influence Pakistan?
x.with.y<-lm(x~l.x+l2.x+l.y+l2.y,data=data.2)
x.without.y<-lm(x~l.x+l2.x,data=data.2)
anova(x.with.y, x.without.y)
## Analysis of Variance Table
## 
## Model 1: x ~ l.x + l2.x + l.y + l2.y
## Model 2: x ~ l.x + l2.x
##   Res.Df   RSS Df Sum of Sq      F  Pr(>F)  
## 1      9 45070                              
## 2     11 87800 -2    -42730 4.2663 0.04975 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Same findings.
############################################################

0.1.8 Vector Autoregression

#front matter
rm(list=ls())
library(vars)
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: sandwich
## Loading required package: urca
#load data on the Canadian economy: productivity, employment, unemployment, real wage
data(Canada)

#run a VAR model, Estimation of a VAR by utilising OLS per equation.
var.model <-vars::VAR(Canada, p=2, type="const") 
#type could also be "none", "trend", or "both"

#allow the BIC to choose the best lag length
var.model.2 <-VAR(Canada, p=2, lag.max=10, ic="AIC", type="const")

#Assess the model
plot(var.model)

#test for serial correlation
vars::serial.test(var.model, lags.pt = 16, type = "PT.adjusted")
## 
##  Portmanteau Test (adjusted)
## 
## data:  Residuals of VAR object var.model
## Chi-squared = 231.59, df = 224, p-value = 0.3497
#Does employment have a Granger causal effect?
vars::causality(var.model, cause="e")
## $Granger
## 
##  Granger causality H0: e do not Granger-cause prod rw U
## 
## data:  VAR object var.model
## F-Test = 6.2768, df1 = 6, df2 = 292, p-value = 3.206e-06
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: e and prod rw U
## 
## data:  VAR object var.model
## Chi-squared = 26.068, df = 3, p-value = 9.228e-06
#What's the impulse response if we perturb employment?
var.model.irf <- vars::irf(var.model, impulse = "e", 
                           response = c("prod", "rw", "U"), boot =FALSE)

#Impulse response analysis
plot(var.model.irf)

#the following is necessary if you do not want to use the whole data set in your VAR model:
Canada.2<-as.data.frame(Canada)
e<-ts(Canada.2$e)
rw<-ts(Canada.2$rw)
U<-ts(Canada.2$U)
prod<-ts(Canada.2$prod)
Canada.3<-ts.union(e,rw,U,dframe=TRUE)

#run a VAR model where productivity is exogenous
var.with.exogenous <-VAR(Canada.3, p=2, type="const", exogen=prod)
## Warning in VAR(Canada.3, p = 2, type = "const", exogen = prod): No column names supplied in exogen, using: exo1 , instead.
####################################################################
###SOURCE: Patrick Brandt's documentation for MSBVAR on CRAN.###
#install.packages("MSBVAR")
#install.packages("bit")
library(MSBVAR)

#load data
data(IsraelPalestineConflict)

#create a vector of variable names
varnames <- colnames(IsraelPalestineConflict)

#specify Bayesian Vector Autoregression Model
fit.BVAR <- szbvar(IsraelPalestineConflict, p=6, z=NULL,
                           lambda0=0.6, lambda1=0.1,
                            lambda3=2, lambda4=0.25, lambda5=0, mu5=0,
                            mu6=0, nu=3, qm=4,
                            prior=0, posterior.fit=FALSE)

# Draw from the posterior pdf of the impulse responses.
posterior.impulses <- mc.irf(fit.BVAR, nsteps=10, draws=5000)

# Plot the responses
plot(posterior.impulses, method=c("Sims-Zha2"), component=1,
         probs=c(0.16,0.84), varnames=varnames) 
####################################################################

0.1.9 Univariate, Nonstationary Processes

#clean up
rm(list=ls())

#load packages
library(foreign)
library(aTSA) #for Dickey-Fuller test
## 
## Attaching package: 'aTSA'
## The following object is masked from 'package:vars':
## 
##     arch.test
## The following object is masked from 'package:graphics':
## 
##     identify
library(tseries) #for KPSS test
## 
## Attaching package: 'tseries'
## The following objects are masked from 'package:aTSA':
## 
##     adf.test, kpss.test, pp.test
#load data
macro<-read.dta("partyid.dta")

#descriptives and line plots
summary(macro)
##    macropart         repub          democrat         time          
##  Min.   :51.00   Min.   :20.32   Min.   :27.79   Length:160        
##  1st Qu.:58.10   1st Qu.:25.07   1st Qu.:41.36   Class :character  
##  Median :61.15   Median :27.57   Median :44.08   Mode  :character  
##  Mean   :61.27   Mean   :27.90   Mean   :42.90                     
##  3rd Qu.:65.12   3rd Qu.:31.16   3rd Qu.:45.94                     
##  Max.   :69.50   Max.   :35.40   Max.   :52.82                     
##      qdate           consumer           papp      
##  Min.   :-28.00   Min.   : 47.57   Min.   :26.30  
##  1st Qu.: 11.75   1st Qu.: 73.63   1st Qu.:47.22  
##  Median : 51.50   Median : 86.08   Median :57.59  
##  Mean   : 51.50   Mean   : 83.30   Mean   :56.45  
##  3rd Qu.: 91.25   3rd Qu.: 94.92   3rd Qu.:64.83  
##  Max.   :131.00   Max.   :106.17   Max.   :80.81
plot(y=macro$macropart,x=macro$qdate,type='l',
     xlab="Time",ylab="Macropartisanship",axes=F)
axis(1,at=seq(from=-28,to=131,by=16),
     label=seq(from=1953,to=1992,by=4))
axis(2);box()

plot(y=macro$consumer,x=macro$qdate,type='l',
     xlab="Time",ylab="Consumer Sentiment",axes=F)
axis(1,at=seq(from=-28,to=131,by=16),
     label=seq(from=1953,to=1992,by=4));axis(2);box()

plot(y=macro$papp,x=macro$qdate,type='l',
     xlab="Time",ylab="Presidential Approval",axes=F)
axis(1,at=seq(from=-28,to=131,by=16),
     label=seq(from=1953,to=1992,by=4));axis(2);box()

#ACF and PACF of each series
acf(macro$macropart,16)

pacf(macro$macropart,16)

print(acf(macro$macropart,16))

## 
## Autocorrelations of series 'macro$macropart', by lag
## 
##     1     2     3     4     5     6     7     8     9    10    11    12 
## 0.927 0.871 0.831 0.772 0.704 0.652 0.608 0.572 0.559 0.531 0.510 0.495 
##    13    14    15    16 
## 0.470 0.432 0.394 0.366
acf(macro$consumer,16)

pacf(macro$consumer,16)

print(acf(macro$consumer,16))

## 
## Autocorrelations of series 'macro$consumer', by lag
## 
##     1     2     3     4     5     6     7     8     9    10    11    12 
## 0.939 0.867 0.794 0.713 0.637 0.571 0.512 0.459 0.422 0.407 0.404 0.401 
##    13    14    15    16 
## 0.401 0.397 0.389 0.384
acf(macro$papp,16)

pacf(macro$papp,16)

print(acf(macro$papp,16))

## 
## Autocorrelations of series 'macro$papp', by lag
## 
##     1     2     3     4     5     6     7     8     9    10    11    12 
## 0.856 0.682 0.537 0.446 0.366 0.305 0.261 0.216 0.205 0.201 0.173 0.151 
##    13    14    15    16 
## 0.139 0.151 0.190 0.229
#Dickey-Fuller tests (Table 5.2)
#Note: Stata's default is with drift and no trend, zero lags. 
#You can adjust this if you feel the need, though.
tseries::adf.test(macro$macropart)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  macro$macropart
## Dickey-Fuller = -2.1515, Lag order = 5, p-value = 0.5131
## alternative hypothesis: stationary
aTSA::adf.test(macro$macropart) #null is nonstationary, so a significant result means stationary
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -0.368   0.538
## [2,]   1 -0.321   0.551
## [3,]   2 -0.271   0.566
## [4,]   3 -0.294   0.559
## [5,]   4 -0.296   0.558
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -2.25   0.232
## [2,]   1 -1.92   0.358
## [3,]   2 -1.67   0.457
## [4,]   3 -1.89   0.369
## [5,]   4 -2.09   0.293
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -2.49   0.369
## [2,]   1 -2.22   0.481
## [3,]   2 -2.03   0.562
## [4,]   3 -2.21   0.484
## [5,]   4 -2.39   0.412
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
aTSA::adf.test(macro$consumer)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -0.636   0.451
## [2,]   1 -0.577   0.472
## [3,]   2 -0.516   0.494
## [4,]   3 -0.463   0.511
## [5,]   4 -0.481   0.505
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -2.25  0.2329
## [2,]   1 -2.45  0.1519
## [3,]   2 -2.47  0.1452
## [4,]   3 -2.69  0.0823
## [5,]   4 -2.59  0.0993
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -2.60  0.3225
## [2,]   1 -2.93  0.1893
## [3,]   2 -3.04  0.1404
## [4,]   3 -3.41  0.0553
## [5,]   4 -3.30  0.0740
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
aTSA::adf.test(macro$papp)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -1.033   0.308
## [2,]   1 -1.123   0.276
## [3,]   2 -1.075   0.294
## [4,]   3 -0.887   0.361
## [5,]   4 -0.983   0.327
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -3.36  0.0156
## [2,]   1 -3.95  0.0100
## [3,]   2 -3.68  0.0100
## [4,]   3 -3.09  0.0317
## [5,]   4 -3.30  0.0183
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -3.65  0.0307
## [2,]   1 -4.25  0.0100
## [3,]   2 -3.98  0.0120
## [4,]   3 -3.44  0.0504
## [5,]   4 -3.63  0.0330
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
#Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test (Table 5.6)
aTSA::kpss.test(macro$macropart)
## KPSS Unit Root Test 
## alternative: nonstationary 
##  
## Type 1: no drift no trend 
##  lag  stat p.value
##    2 0.127     0.1
## ----- 
##  Type 2: with drift no trend 
##  lag  stat p.value
##    2 0.296     0.1
## ----- 
##  Type 1: with drift and trend 
##  lag  stat p.value
##    2 0.147  0.0488
## ----------- 
## Note: p.value = 0.01 means p.value <= 0.01 
##     : p.value = 0.10 means p.value >= 0.10
#null is stationary, so a significant result means nonstationary
tseries::kpss.test(macro$macropart) 
## Warning in tseries::kpss.test(macro$macropart): p-value smaller than
## printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  macro$macropart
## KPSS Level = 0.75467, Truncation lag parameter = 4, p-value = 0.01
tseries::kpss.test(macro$consumer)
## Warning in tseries::kpss.test(macro$consumer): p-value smaller than printed
## p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  macro$consumer
## KPSS Level = 1.4267, Truncation lag parameter = 4, p-value = 0.01
tseries::kpss.test(macro$papp)
## Warning in tseries::kpss.test(macro$papp): p-value smaller than printed p-
## value
## 
##  KPSS Test for Level Stationarity
## 
## data:  macro$papp
## KPSS Level = 0.93357, Truncation lag parameter = 4, p-value = 0.01
#ARIMA models for macropartisanship, Table 5.7
mod.ar1<-arima(macro$macropart,order=c(1,0,0));mod.ar1
## 
## Call:
## arima(x = macro$macropart, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9286    60.8512
## s.e.  0.0279     1.7692
## 
## sigma^2 estimated as 2.946:  log likelihood = -314.46,  aic = 632.92
mod.i1<-arima(macro$macropart,order=c(0,1,0));mod.i1
## 
## Call:
## arima(x = macro$macropart, order = c(0, 1, 0))
## 
## 
## sigma^2 estimated as 3.06:  log likelihood = -314.52,  aic = 629.04
mod.ari11<-arima(macro$macropart,order=c(1,1,0));mod.ari11
## 
## Call:
## arima(x = macro$macropart, order = c(1, 1, 0))
## 
## Coefficients:
##           ar1
##       -0.1450
## s.e.   0.0787
## 
## sigma^2 estimated as 2.995:  log likelihood = -312.84,  aic = 627.68
#diagnose each
acf(mod.ar1$resid,16)

pacf(mod.ar1$resid,16)

Box.test(mod.ar1$resid,16,"Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  mod.ar1$resid
## X-squared = 28.493, df = 16, p-value = 0.02759
acf(mod.i1$resid,16)

pacf(mod.i1$resid,16)

Box.test(mod.i1$resid,16,"Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  mod.i1$resid
## X-squared = 31.155, df = 16, p-value = 0.01286
acf(mod.ari11$resid,16)

pacf(mod.ari11$resid,16)

Box.test(mod.ari11$resid,16,"Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  mod.ari11$resid
## X-squared = 28.947, df = 16, p-value = 0.0243
#Just for fun. An alternative estimator of the ARI(1,1) model.
library(dyn)
d.macropart<-ts(diff(macro$macropart))
macro$d.macropart<-c(NA,d.macropart)
head(macro)
##   macropart repub democrat   time qdate consumer  papp d.macropart
## 1      60.5 31.57    46.87 1953q1   -28    94.73 69.83          NA
## 2      59.1 31.38    47.02 1953q2   -27    91.60 73.67  -1.4000015
## 3      57.8 33.07    45.30 1953q3   -26    87.80 73.67  -1.2999992
## 4      59.2 31.42    45.25 1953q4   -25    84.50 63.83   1.4000015
## 5      59.4 30.07    44.08 1954q1   -24    85.23 68.67   0.2000008
## 6      59.7 31.63    43.88 1954q2   -23    86.20 63.50   0.2999992
ls.ari11<-dyn$lm(d.macropart~lag(d.macropart,-1));summary(ls.ari11)
## 
## Call:
## lm(formula = dyn(d.macropart ~ lag(d.macropart, -1)))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.2063 -0.9269  0.0049  1.2141  3.8354 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          -0.02479    0.13875  -0.179   0.8584  
## lag(d.macropart, -1) -0.14565    0.07936  -1.835   0.0684 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.744 on 156 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.02113,    Adjusted R-squared:  0.01486 
## F-statistic: 3.368 on 1 and 156 DF,  p-value: 0.06838

0.1.10 Error Correction Models

rm(list=ls())
library(tseries)

#set obs
n<-500

#generate two disturbances
delta<-rnorm(n)
epsilon<-rnorm(n)

#create two variables
x<-rep(NA,n)
y<-rep(NA,n)
x[1]<-0
y[1]<-0

#create two random walks
for(t in 2:n){
x[t]<-x[t-1]+delta[t]
}

for(t in 2:n){
y[t]<-y[t-1]+epsilon[t]
}

#Are x & y integrated series? Remember, non-significant means integrated.
adf.test(x, k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  x
## Dickey-Fuller = -2.4998, Lag order = 0, p-value = 0.3668
## alternative hypothesis: stationary
adf.test(y, k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y
## Dickey-Fuller = -2.2377, Lag order = 0, p-value = 0.4777
## alternative hypothesis: stationary
#Are they cointegrated?
mod.1 <-lm(y~x)
adf.test(mod.1$residuals)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  mod.1$residuals
## Dickey-Fuller = -2.6999, Lag order = 7, p-value = 0.282
## alternative hypothesis: stationary
###################################################

rm(list=ls())
library(foreign)
library(timeSeries)
library(tseries)
library(dyn)
library(aTSA)
options(scipen=12)

#load data and create lags
ip<-read.dta("indipaki.dta")

#data cleaning
ip$d.pakds<-c(NA,diff(ip$pakds))
ip$d.indds<-c(NA,diff(ip$indds))
ip.1<-subset(ip,year<1991)

#plot the series
plot(y=ip.1$indds,x=ip.1$year,type='l',
     xlab="Year",ylab="Spending (Millions of U.S. Dollars)",axes=F)
lines(y=ip.1$pakds,x=ip.1$year,lty=2,col='blue')
axis(1);axis(2, at=c(0,2000000,4000000,6000000,8000000,10000000),
             labels=c(0,2000,4000,6000,8000,10000));box()
legend(x=1950,y=10000000,legend=c("India","Pakistan"),
       lty=c(1,2),col=c("black","blue"))

####SUPPOSE WE BELIEVE THEORETICALLY THAT PAKISTAN'S SPENDING SHAPES INDIA'S IN A ONE-WAY RELATIONSHIP###
#Dickey-Fuller tests
aTSA::adf.test(ip.1$pakds) #null is nonstationary, so a significant result means stationary
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag  ADF p.value
## [1,]   0 3.35    0.99
## [2,]   1 3.59    0.99
## [3,]   2 3.37    0.99
## [4,]   3 2.89    0.99
## Type 2: with drift no trend 
##      lag  ADF p.value
## [1,]   0 1.79    0.99
## [2,]   1 2.21    0.99
## [3,]   2 2.25    0.99
## [4,]   3 2.07    0.99
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -0.622   0.969
## [2,]   1 -0.464   0.979
## [3,]   2 -0.420   0.981
## [4,]   3 -0.458   0.979
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
aTSA::adf.test(ip.1$indds)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag  ADF p.value
## [1,]   0 3.97    0.99
## [2,]   1 3.70    0.99
## [3,]   2 4.99    0.99
## [4,]   3 3.34    0.99
## Type 2: with drift no trend 
##      lag  ADF p.value
## [1,]   0 2.27    0.99
## [2,]   1 2.40    0.99
## [3,]   2 3.73    0.99
## [4,]   3 2.69    0.99
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -0.368   0.983
## [2,]   1 -0.341   0.984
## [3,]   2  0.763   0.990
## [4,]   3  0.725   0.990
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
#Step 1 of the Engle-Granger Two-Step estimation
equilibrium<-lm(indds~pakds,data=ip.1)
summary(equilibrium)
## 
## Call:
## lm(formula = indds ~ pakds, data = ip.1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2152591  -343570     -323   280093  1550765 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)    
## (Intercept) -62240.4504 130591.2605  -0.477    0.636    
## pakds            3.4040      0.1084  31.390   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 591200 on 41 degrees of freedom
## Multiple R-squared:  0.9601, Adjusted R-squared:  0.9591 
## F-statistic: 985.3 on 1 and 41 DF,  p-value: < 0.00000000000000022
ip.1$z<-equilibrium$residuals
head(ip.1)
##   year  pakds  indds       res1     res1b       res2 d.pakds d.indds
## 1 1948 107414 275196 -132230.92 -285613.9 -22561.518      NA      NA
## 2 1949 187161 503918 -113920.27 -305309.3  -7322.844   79747  228722
## 3 1950 199398 366073  -64657.13 -481273.4  43791.703   12237 -137845
## 4 1951 244578 383874  -24258.60 -604211.3  83951.141   45180   17801
## 5 1952 282563 390843   11854.47 -715568.2 119970.617   37985    6969
## 6 1953 246233 403943  -27994.28 -589297.8  79945.922  -36330   13100
##            z
## 1  -28198.78
## 2  -70934.07
## 3 -250433.58
## 4 -386424.45
## 5 -508755.68
## 6 -371989.04
#Are the residuals stationary?
aTSA::adf.test(ip.1$z) #null is nonstationary, so a significant result means stationary
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag   ADF p.value
## [1,]   0 -3.48  0.0100
## [2,]   1 -3.19  0.0100
## [3,]   2 -2.63  0.0101
## [4,]   3 -3.14  0.0100
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -3.44  0.0180
## [2,]   1 -3.15  0.0341
## [3,]   2 -2.58  0.1134
## [4,]   3 -3.08  0.0398
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -3.44  0.0643
## [2,]   1 -3.15  0.1188
## [3,]   2 -2.56  0.3436
## [4,]   3 -3.03  0.1643
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
plot(y=ip.1$z,x=ip.1$year,type='l')

acf(ip.1$z,15)

pacf(ip.1$z,15)

#Step 2, Version A: zero lag model testing for error correction only
ip.2<-ts(ip.1)
ind.0<-dyn$lm(d.indds~lag(z,-1),data=ip.2)
summary(ind.0)
## 
## Call:
## lm(formula = dyn(d.indds ~ lag(z, -1)), data = ip.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -879370 -257132  -90469   87393 1514427 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)   
## (Intercept) 232582.2303  72495.7990   3.208  0.00263 **
## lag(z, -1)      -0.2196      0.1244  -1.765  0.08523 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 469800 on 40 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.07224,    Adjusted R-squared:  0.04905 
## F-statistic: 3.115 on 1 and 40 DF,  p-value: 0.08523
#Step 2, Version B: allowing for error correction and Granger causation
ind.1<-dyn$lm(d.indds~lag(z,-1)+d.pakds,data=ip.2)
summary(ind.1)
## 
## Call:
## lm(formula = dyn(d.indds ~ lag(z, -1) + d.pakds), data = ip.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -868554 -209884  -31850  135506 1185488 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)   
## (Intercept) 132814.1455  71220.4673   1.865  0.06974 . 
## lag(z, -1)      -0.3238      0.1153  -2.809  0.00774 **
## d.pakds          1.4856      0.4434   3.350  0.00180 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 419300 on 39 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.2796, Adjusted R-squared:  0.2427 
## F-statistic: 7.568 on 2 and 39 DF,  p-value: 0.00167
#Engle-Granger Single Equation Estimation
ind.one.step<-dyn$lm(d.indds~lag(indds,-1)+lag(pakds,-1)+d.pakds,data=ip.2)
summary(ind.one.step)
## 
## Call:
## lm(formula = dyn(d.indds ~ lag(indds, -1) + lag(pakds, -1) + 
##     d.pakds), data = ip.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -969368 -152029    4804  116469  989229 
## 
## Coefficients:
##                   Estimate  Std. Error t value Pr(>|t|)   
## (Intercept)    -12089.0072  91148.0767  -0.133  0.89519   
## lag(indds, -1)     -0.2981      0.1112  -2.682  0.01078 * 
## lag(pakds, -1)      1.1906      0.3785   3.146  0.00321 **
## d.pakds             1.2133      0.4441   2.732  0.00949 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 401800 on 38 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.3553, Adjusted R-squared:  0.3044 
## F-statistic: 6.981 on 3 and 38 DF,  p-value: 0.0007445
####CONSIDERING THE TWO-WAY APPROACH FROM THE TEXT####
#Step 1 of the Engle-Granger Two-Step estimation
equilibrium.2<-lm(pakds~indds,data=ip.1)
summary(equilibrium)
## 
## Call:
## lm(formula = indds ~ pakds, data = ip.1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2152591  -343570     -323   280093  1550765 
## 
## Coefficients:
##                Estimate  Std. Error t value Pr(>|t|)    
## (Intercept) -62240.4504 130591.2605  -0.477    0.636    
## pakds            3.4040      0.1084  31.390   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 591200 on 41 degrees of freedom
## Multiple R-squared:  0.9601, Adjusted R-squared:  0.9591 
## F-statistic: 985.3 on 1 and 41 DF,  p-value: < 0.00000000000000022
ip.1$z.2<-equilibrium.2$residuals
head(ip.1)
##   year  pakds  indds       res1     res1b       res2 d.pakds d.indds
## 1 1948 107414 275196 -132230.92 -285613.9 -22561.518      NA      NA
## 2 1949 187161 503918 -113920.27 -305309.3  -7322.844   79747  228722
## 3 1950 199398 366073  -64657.13 -481273.4  43791.703   12237 -137845
## 4 1951 244578 383874  -24258.60 -604211.3  83951.141   45180   17801
## 5 1952 282563 390843   11854.47 -715568.2 119970.617   37985    6969
## 6 1953 246233 403943  -27994.28 -589297.8  79945.922  -36330   13100
##            z        z.2
## 1  -28198.78 -22561.518
## 2  -70934.07  -7322.844
## 3 -250433.58  43791.702
## 4 -386424.45  83951.142
## 5 -508755.68 119970.618
## 6 -371989.04  79945.918
#Step 2, Version A: zero lag model testing for error correction only
ip.2<-ts(ip.1)
ind.0<-dyn$lm(d.indds~lag(z.2,-1),data=ip.2);summary(ind.0)
## 
## Call:
## lm(formula = dyn(d.indds ~ lag(z.2, -1)), data = ip.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -859635 -251706 -110297  132744 1474713 
## 
## Coefficients:
##                 Estimate  Std. Error t value Pr(>|t|)   
## (Intercept)  234129.7881  70851.3041   3.305  0.00201 **
## lag(z.2, -1)      0.9549      0.4214   2.266  0.02894 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 459200 on 40 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1138, Adjusted R-squared:  0.0916 
## F-statistic: 5.134 on 1 and 40 DF,  p-value: 0.02894
pak.0<-dyn$lm(d.pakds~lag(z.2,-1),data=ip.2);summary(pak.0)
## 
## Call:
## lm(formula = dyn(d.pakds ~ lag(z.2, -1)), data = ip.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -311834  -77345  -12809   55785  520417 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  66686.7339 23405.5185   2.849  0.00689 **
## lag(z.2, -1)    -0.1925     0.1392  -1.383  0.17441   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 151700 on 40 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.04562,    Adjusted R-squared:  0.02176 
## F-statistic: 1.912 on 1 and 40 DF,  p-value: 0.1744
#Step 2, Version B: allowing for error correction and Granger causation
ind.3<-dyn$lm(d.indds~lag(d.indds,-1)+lag(d.indds,-2)+
                lag(d.indds,-3)+lag(d.pakds,-1)+lag(d.pakds,-2)+
                lag(d.pakds,-3)+lag(z.2,-1),data=ip.2);summary(ind.3)
## 
## Call:
## lm(formula = dyn(d.indds ~ lag(d.indds, -1) + lag(d.indds, -2) + 
##     lag(d.indds, -3) + lag(d.pakds, -1) + lag(d.pakds, -2) + 
##     lag(d.pakds, -3) + lag(z.2, -1)), data = ip.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -839830 -215755   20116  242414  694300 
## 
## Coefficients:
##                      Estimate   Std. Error t value Pr(>|t|)    
## (Intercept)      182495.24228  83729.27663   2.180 0.037006 *  
## lag(d.indds, -1)      0.41021      0.17904   2.291 0.028912 *  
## lag(d.indds, -2)     -0.06479      0.16694  -0.388 0.700594    
## lag(d.indds, -3)      0.70627      0.16550   4.268 0.000173 ***
## lag(d.pakds, -1)     -0.48663      0.58371  -0.834 0.410842    
## lag(d.pakds, -2)     -0.35497      0.57279  -0.620 0.539969    
## lag(d.pakds, -3)     -2.01313      0.54257  -3.710 0.000811 ***
## lag(z.2, -1)          1.58074      0.52104   3.034 0.004854 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 386800 on 31 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.5024, Adjusted R-squared:   0.39 
## F-statistic: 4.471 on 7 and 31 DF,  p-value: 0.001547
pak.3<-dyn$lm(d.pakds~lag(d.indds,-1)+lag(d.indds,-2)+
                lag(d.indds,-3)+lag(d.pakds,-1)+lag(d.pakds,-2)+
                lag(d.pakds,-3)+lag(z.2,-1),data=ip.2);summary(pak.3)
## 
## Call:
## lm(formula = dyn(d.pakds ~ lag(d.indds, -1) + lag(d.indds, -2) + 
##     lag(d.indds, -3) + lag(d.pakds, -1) + lag(d.pakds, -2) + 
##     lag(d.pakds, -3) + lag(z.2, -1)), data = ip.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -310088  -47086  -19697   40008  480445 
## 
## Coefficients:
##                     Estimate  Std. Error t value Pr(>|t|)   
## (Intercept)      31854.27416 32052.04119   0.994  0.32800   
## lag(d.indds, -1)     0.01674     0.06854   0.244  0.80867   
## lag(d.indds, -2)    -0.02616     0.06390  -0.409  0.68503   
## lag(d.indds, -3)     0.18314     0.06335   2.891  0.00696 **
## lag(d.pakds, -1)    -0.04804     0.22345  -0.215  0.83116   
## lag(d.pakds, -2)     0.03401     0.21927   0.155  0.87775   
## lag(d.pakds, -3)    -0.06604     0.20770  -0.318  0.75266   
## lag(z.2, -1)        -0.15418     0.19946  -0.773  0.44539   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 148100 on 31 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.2924, Adjusted R-squared:  0.1326 
## F-statistic:  1.83 on 7 and 31 DF,  p-value: 0.1166
#Granger Causality tests
ind.subset<-dyn$lm(d.indds~lag(d.indds,-1)+lag(d.indds,-2)+
                     lag(d.indds,-3)+lag(z.2,-1),data=ip.2)
summary(ind.subset)
## 
## Call:
## lm(formula = dyn(d.indds ~ lag(d.indds, -1) + lag(d.indds, -2) + 
##     lag(d.indds, -3) + lag(z.2, -1)), data = ip.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -893993 -192768  -39498  196995 1319706 
## 
## Coefficients:
##                     Estimate  Std. Error t value Pr(>|t|)  
## (Intercept)      152072.5982  94974.2444   1.601   0.1186  
## lag(d.indds, -1)      0.1265      0.1570   0.806   0.4258  
## lag(d.indds, -2)     -0.1300      0.1639  -0.793   0.4332  
## lag(d.indds, -3)      0.4166      0.1626   2.562   0.0150 *
## lag(z.2, -1)          1.0106      0.4216   2.397   0.0222 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 447700 on 34 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.2691, Adjusted R-squared:  0.1831 
## F-statistic: 3.129 on 4 and 34 DF,  p-value: 0.02703
pak.subset<-dyn$lm(d.pakds~lag(d.pakds,-1)+lag(d.pakds,-2)+
                     lag(d.pakds,-3)+lag(z.2,-1),data=ip.2)
summary(pak.subset)
## 
## Call:
## lm(formula = dyn(d.pakds ~ lag(d.pakds, -1) + lag(d.pakds, -2) + 
##     lag(d.pakds, -3) + lag(z.2, -1)), data = ip.2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -336515  -67738  -20623   46143  465939 
## 
## Coefficients:
##                      Estimate   Std. Error t value Pr(>|t|)
## (Intercept)      51730.095149 33456.928080   1.546    0.131
## lag(d.pakds, -1)     0.006645     0.194366   0.034    0.973
## lag(d.pakds, -2)     0.106523     0.182850   0.583    0.564
## lag(d.pakds, -3)     0.159098     0.182964   0.870    0.391
## lag(z.2, -1)        -0.224621     0.170159  -1.320    0.196
## 
## Residual standard error: 161900 on 34 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.07282,    Adjusted R-squared:  -0.03626 
## F-statistic: 0.6676 on 4 and 34 DF,  p-value: 0.6189
anova(ind.subset,ind.3)
## Analysis of Variance Table
## 
## Model 1: d.indds ~ lag(d.indds, -1) + lag(d.indds, -2) + lag(d.indds, 
##     -3) + lag(z.2, -1)
## Model 2: d.indds ~ lag(d.indds, -1) + lag(d.indds, -2) + lag(d.indds, 
##     -3) + lag(d.pakds, -1) + lag(d.pakds, -2) + lag(d.pakds, 
##     -3) + lag(z.2, -1)
##   Res.Df           RSS Df     Sum of Sq      F   Pr(>F)   
## 1     34 6813459188945                                    
## 2     31 4638544609431  3 2174914579514 4.8451 0.007038 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(pak.subset,pak.3)
## Analysis of Variance Table
## 
## Model 1: d.pakds ~ lag(d.pakds, -1) + lag(d.pakds, -2) + lag(d.pakds, 
##     -3) + lag(z.2, -1)
## Model 2: d.pakds ~ lag(d.indds, -1) + lag(d.indds, -2) + lag(d.indds, 
##     -3) + lag(d.pakds, -1) + lag(d.pakds, -2) + lag(d.pakds, 
##     -3) + lag(z.2, -1)
##   Res.Df          RSS Df    Sum of Sq      F  Pr(>F)  
## 1     34 890666905515                                 
## 2     31 679733139150  3 210933766366 3.2066 0.03659 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
######################################################################

0.1.11 Heteroscedasticity in Time Series

#front matter
rm(list=ls())
#install.packages("fGarch")
#install.packages("tseries")
library(fGarch)
## Loading required package: fBasics
## 
## Attaching package: 'fBasics'
## The following object is masked from 'package:car':
## 
##     densityPlot
library(tseries)

#Monte Carlo example: generate and fit an ARCH(2) model
n <- 1100
a <- c(0.1, 0.5, 0.2)  # ARCH(2) coefficients
e <- rnorm(n)  
x <- double(n)
x[1:2] <- rnorm(2, sd = sqrt(a[1]/(1.0-a[2]-a[3]))) 
for(i in 3:n)  # Generate ARCH(2) process
{
  x[i] <- e[i]*sqrt(a[1]+a[2]*x[i-1]^2+a[3]*x[i-2]^2)
}
x <- ts(x[101:1100])
x.arch <- garchFit(formula=~garch(2,0), 
                   include.mean=FALSE, 
                   data=x, trace=FALSE)  # Fit ARCH(2)
### omega is the constant in "h", alpha refers to MA terms, beta refers to AR terms
### NOTE DIFFERENCES FROM "garch" IN "tseries" ###
summary(x.arch)          
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(2, 0), data = x, include.mean = FALSE, 
##     trace = FALSE) 
## 
## Mean and Variance Equation:
##  data ~ garch(2, 0)
## <environment: 0x7fb3a8097fd0>
##  [data = x]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##   omega   alpha1   alpha2  
## 0.10373  0.50385  0.21025  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value            Pr(>|t|)    
## omega    0.10373     0.01003   10.337             < 2e-16 ***
## alpha1   0.50385     0.06440    7.824 0.00000000000000511 ***
## alpha2   0.21025     0.04748    4.428 0.00000950189249793 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  -713.0747    normalized:  -0.7130747 
## 
## Description:
##  Fri Jan  4 21:09:14 2019 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value  
##  Jarque-Bera Test   R    Chi^2  1.903581  0.3860491
##  Shapiro-Wilk Test  R    W      0.9977182 0.183673 
##  Ljung-Box Test     R    Q(10)  8.357049  0.5940044
##  Ljung-Box Test     R    Q(15)  12.57897  0.634782 
##  Ljung-Box Test     R    Q(20)  14.31629  0.8141042
##  Ljung-Box Test     R^2  Q(10)  11.40563  0.3268006
##  Ljung-Box Test     R^2  Q(15)  16.7809   0.3321302
##  Ljung-Box Test     R^2  Q(20)  20.47191  0.4287787
##  LM Arch Test       R    TR^2   10.24339  0.5946172
## 
## Information Criterion Statistics:
##      AIC      BIC      SIC     HQIC 
## 1.432149 1.446873 1.432131 1.437745
#plot(x.arch)                        
#post-diagnosis: plot 11 (ACF of squared residuals) is a good choice to see if we filtered heteroscedasticity


#####Working with real data on EU stock markets#####
data(EuStockMarkets)  

###Start with diagnosis###
plot(EuStockMarkets[,"DAX"], type='l')

#more variable at higher values, but first trending
plot(diff(EuStockMarkets)[,"DAX"], type='l')

#still more variable at higher values, so take the difference of the log
plot(diff(log(EuStockMarkets))[,"DAX"], type='l')

#still heteroscedastic

#sizing-up potential heteroscedasticity
squares<-diff(log(EuStockMarkets))[,"DAX"]^2

#ACF/PACF/Box-Test--They're valid for variances too!
acf(squares, 20)

pacf(squares, 20)

Box.test(squares, 20, 'Ljung-Box')
## 
##  Box-Ljung test
## 
## data:  squares
## X-squared = 137.24, df = 20, p-value < 0.00000000000000022
#Here's a model with no ARMA process, and ARCH(2)
dax.garch.1 <- garchFit(formula=~garch(2,0), 
                        data=diff(log(EuStockMarkets))[,"DAX"], 
                        trace=FALSE)  
summary(dax.garch.1)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(2, 0), data = diff(log(EuStockMarkets))[, 
##     "DAX"], trace = FALSE) 
## 
## Mean and Variance Equation:
##  data ~ garch(2, 0)
## <environment: 0x7fb38e30a4e8>
##  [data = diff(log(EuStockMarkets))[, "DAX"]]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##         mu       omega      alpha1      alpha2  
## 0.00067795  0.00008684  0.08637820  0.09013990  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##           Estimate  Std. Error  t value Pr(>|t|)    
## mu     0.000677948 0.000231896    2.924 0.003461 ** 
## omega  0.000086840 0.000003891   22.316  < 2e-16 ***
## alpha1 0.086378202 0.024305122    3.554 0.000380 ***
## alpha2 0.090139903 0.026104314    3.453 0.000554 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  5900.61    normalized:  3.174077 
## 
## Description:
##  Fri Jan  4 21:09:14 2019 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value  
##  Jarque-Bera Test   R    Chi^2  4780.743  0        
##  Shapiro-Wilk Test  R    W      0.9533134 0        
##  Ljung-Box Test     R    Q(10)  5.018164  0.8899613
##  Ljung-Box Test     R    Q(15)  13.69537  0.5487467
##  Ljung-Box Test     R    Q(20)  18.04834  0.5842234
##  Ljung-Box Test     R^2  Q(10)  14.8518   0.137559 
##  Ljung-Box Test     R^2  Q(15)  22.74342  0.0897415
##  Ljung-Box Test     R^2  Q(20)  25.3464   0.1884986
##  LM Arch Test       R    TR^2   14.05836  0.2969968
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -6.343851 -6.331957 -6.343861 -6.339468
#plot(dax.garch.1)
#It's a winner!

#Here's a model with no ARMA process, and GARCH(1,1)
dax.garch.2 <- garchFit(formula=~garch(1,1), 
                        data=diff(log(EuStockMarkets))[,"DAX"], 
                        trace=FALSE)  
summary(dax.garch.2)  
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(1, 1), data = diff(log(EuStockMarkets))[, 
##     "DAX"], trace = FALSE) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 1)
## <environment: 0x7fb3a0b61420>
##  [data = diff(log(EuStockMarkets))[, "DAX"]]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##           mu         omega        alpha1         beta1  
## 0.0006535079  0.0000047543  0.0684165305  0.8876111521  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##           Estimate  Std. Error  t value   Pr(>|t|)    
## mu     0.000653508 0.000215758    3.029    0.00245 ** 
## omega  0.000004754 0.000001264    3.760    0.00017 ***
## alpha1 0.068416531 0.014777055    4.630 0.00000366 ***
## beta1  0.887611152 0.023558551   37.677    < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  5966.214    normalized:  3.209368 
## 
## Description:
##  Fri Jan  4 21:09:14 2019 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value  
##  Jarque-Bera Test   R    Chi^2  13380.69  0        
##  Shapiro-Wilk Test  R    W      0.9477474 0        
##  Ljung-Box Test     R    Q(10)  3.195816  0.9764329
##  Ljung-Box Test     R    Q(15)  10.13427  0.8112099
##  Ljung-Box Test     R    Q(20)  12.80196  0.8857182
##  Ljung-Box Test     R^2  Q(10)  0.8932651 0.9998977
##  Ljung-Box Test     R^2  Q(15)  1.329651  0.9999981
##  Ljung-Box Test     R^2  Q(20)  1.756904  1        
##  LM Arch Test       R    TR^2   1.08588   0.9999776
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -6.414432 -6.402538 -6.414441 -6.410049
#plot(dax.garch.2)     
#This one rocks our world more.

#Here's a model with an ARMA(1,1) process, and GARCH(1,1)
dax.garch.3 <- garchFit(formula=~arma(1,1)+garch(1,1),
                        data=diff(log(EuStockMarkets))[,"DAX"], 
                        trace=FALSE)  
## Warning in arima(.series$x, order = c(u, 0, v), include.mean =
## include.mean): possible convergence problem: optim gave code = 1
summary(dax.garch.3)  
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~arma(1, 1) + garch(1, 1), data = diff(log(EuStockMarkets))[, 
##     "DAX"], trace = FALSE) 
## 
## Mean and Variance Equation:
##  data ~ arma(1, 1) + garch(1, 1)
## <environment: 0x7fb3a902c920>
##  [data = diff(log(EuStockMarkets))[, "DAX"]]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##            mu            ar1            ma1          omega         alpha1  
##  0.0006113149   0.0721959825  -0.0568881669   0.0000049191   0.0705854371  
##         beta1  
##  0.8840313184  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##            Estimate   Std. Error  t value   Pr(>|t|)    
## mu      0.000611315  0.000455023    1.343      0.179    
## ar1     0.072195983  0.622786398    0.116      0.908    
## ma1    -0.056888167  0.633345760   -0.090      0.928    
## omega   0.000004919  0.000001216    4.044 0.00005256 ***
## alpha1  0.070585437  0.014465626    4.880 0.00000106 ***
## beta1   0.884031318  0.022523290   39.250    < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  5966.945    normalized:  3.209761 
## 
## Description:
##  Fri Jan  4 21:09:14 2019 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value  
##  Jarque-Bera Test   R    Chi^2  13311.37  0        
##  Shapiro-Wilk Test  R    W      0.9480408 0        
##  Ljung-Box Test     R    Q(10)  2.928107  0.9830656
##  Ljung-Box Test     R    Q(15)  9.632712  0.8421855
##  Ljung-Box Test     R    Q(20)  12.39132  0.9019491
##  Ljung-Box Test     R^2  Q(10)  0.893068  0.9998978
##  Ljung-Box Test     R^2  Q(15)  1.332901  0.9999981
##  Ljung-Box Test     R^2  Q(20)  1.756903  1        
##  LM Arch Test       R    TR^2   1.089638  0.9999772
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -6.413066 -6.395225 -6.413087 -6.406491
#plot(dax.garch.3)     
#AR and MA are superfluous.

#Make a plot selection (or 0 to exit): 

 #1:   Time Series
# 2:   Conditional SD
# 3:   Series with 2 Conditional SD Superimposed
# 4:   ACF of Observations
# 5:   ACF of Squared Observations
# 6:   Cross Correlation
# 7:   Residuals
# 8:   Conditional SDs
# 9:   Standardized Residuals
#10:   ACF of Standardized Residuals
#11:   ACF of Squared Standardized Residuals
#12:   Cross Correlation between r^2 and r
#13:   QQ-Plot of Standardized Residuals

0.2 Panel data analysis

0.2.1 Visualizing Longitudinal and Clustered Data

#https://spia.uga.edu/faculty_pages/monogan/teaching/pd/
# cleanup
rm(list=ls())
library(lattice)

# load data
divorce <- read.table("Divorce.txt", sep ="\t", quote = "",header=TRUE)
#divorce<-read.table("//spia.uga.edu/faculty_pages/monogan/teaching/pd/Divorce.txt", 
#                    sep ="\t", quote = "",header=TRUE)

# Create a new variable for year
divorce$YEAR=divorce$TIME*10+1955

#quick overview of data
head(divorce)
##   DIVORCE BIRTH MARRIAGE UNEMPLOY  CRIME AFDC STATE TIME    STATE.Name
## 1     2.6  19.9      8.8      4.9  6.799  114     1    1         Maine
## 2     2.3  19.5     13.4      2.8  6.106  188     2    1 New Hampshire
## 3     1.5  20.5      9.0      4.2  5.793  113     3    1       Vermont
## 4     1.5  18.8      7.1      4.9 15.072  188     4    1 Massachusetts
## 5     1.3  19.4      7.1      4.9 14.180  172     5    1  Rhode Island
## 6     1.3  19.2      7.4      3.9 11.749  197     6    1   Connecticut
##        Region YEAR
## 1 New England 1965
## 2 New England 1965
## 3 New England 1965
## 4 New England 1965
## 5 New England 1965
## 6 New England 1965
summary(divorce)
##     DIVORCE          BIRTH          MARRIAGE         UNEMPLOY     
##  Min.   :0.500   Min.   :11.20   Min.   : 6.100   Min.   : 2.300  
##  1st Qu.:3.300   1st Qu.:14.30   1st Qu.: 7.975   1st Qu.: 4.700  
##  Median :4.250   Median :15.70   Median : 9.200   Median : 5.900  
##  Mean   :4.361   Mean   :16.38   Mean   :10.175   Mean   : 6.339  
##  3rd Qu.:5.300   3rd Qu.:18.50   3rd Qu.:11.000   3rd Qu.: 8.000  
##  Max.   :9.100   Max.   :27.90   Max.   :88.100   Max.   :13.000  
##  NA's   :12      NA's   :3       NA's   :4        NA's   :3       
##      CRIME             AFDC           STATE         TIME     
##  Min.   : 2.458   Min.   : 33.0   Min.   : 1   Min.   :1.00  
##  1st Qu.: 6.575   1st Qu.:154.0   1st Qu.:13   1st Qu.:1.75  
##  Median :22.950   Median :224.0   Median :26   Median :2.50  
##  Mean   :28.779   Mean   :245.9   Mean   :26   Mean   :2.50  
##  3rd Qu.:47.095   3rd Qu.:315.0   3rd Qu.:39   3rd Qu.:3.25  
##  Max.   :83.421   Max.   :731.0   Max.   :51   Max.   :4.00  
##  NA's   :4        NA's   :3                                  
##       STATE.Name                 Region        YEAR     
##  Alabama   :  4   South Atlantic    :36   Min.   :1965  
##  Alaska    :  4   Mountain          :32   1st Qu.:1972  
##  Arizona   :  4   West North Central:28   Median :1980  
##  Arkansas  :  4   New England       :24   Mean   :1980  
##  California:  4   East North Central:20   3rd Qu.:1988  
##  Colorado  :  4   Pacific           :20   Max.   :1995  
##  (Other)   :180   (Other)           :44
# Individual-Level Time Plot Using "lattice" Graphics
#trellis.device("png",color=FALSE,file="stateDivorce.png")
xyplot(DIVORCE~YEAR, data=divorce, type='l', 
       groups=STATE, xlab="Year", ylab="Divorce Rate")

#dev.off()

#regional subsets of individual time plot
xyplot(DIVORCE~YEAR, data=divorce, type='l', 
       groups=STATE, xlab="Year", ylab="Divorce Rate",subset=Region=="New England")

xyplot(DIVORCE~YEAR, data=divorce, type='l', 
       groups=STATE, xlab="Year", ylab="Divorce Rate",subset=Region=="South Atlantic")

# Time Plot of Means
divorce.2 <- na.omit(divorce)
div.mean <- by(divorce.2$DIVORCE, divorce.2$YEAR, mean)
years <- c(1965,1975,1985,1995)
plot(div.mean ~ years, type='o')

# Box Plot
boxplot(DIVORCE~YEAR, data=divorce)

#####ALTERNATE CODE#####
#  Individual-Level Time Plot Using "base" Graphics
plot(DIVORCE ~ YEAR, data = divorce)
   for (i in divorce$STATE) {
   lines(DIVORCE ~ YEAR, data = subset(divorce, STATE == i), col='gray60') }

#  Individual-Level Time Plot for New England v. South
par(mfrow=c(2,1))
plot(DIVORCE ~ YEAR, 
     data = subset(divorce, Region=="New England"), 
     main="New England")
   for (i in divorce$STATE) {
   lines(DIVORCE ~ YEAR, 
         data = subset(divorce, STATE == i & Region=="New England"), 
         col='gray60') 
     }

plot(DIVORCE ~ YEAR, 
     data = subset(divorce, Region=="South Atlantic"), 
     main="South Atlantic")
   for (i in divorce$STATE) {
   lines(DIVORCE ~ YEAR, 
         data = subset(divorce, STATE == i & Region=="South Atlantic"), 
         col='gray60') 
     }

#####################################################################

# Illustration on variance components

#set time
t<-c(1:4)

#deterministic trends, notice variation between individuals
trend.1<- 1+.5*t
trend.2<- 2+.25*t
plot(y=trend.1,x=t,type='l',ylim=c(0,5))
lines(y=trend.2,x=t,lty=2)

#plus natural variation
cycle.1<-trend.1+rnorm(4,sd=.1)
cycle.2<-trend.2+rnorm(4,sd=.1)
plot(y=cycle.1,x=t,type='l',ylim=c(0,5))
lines(y=cycle.2,x=t,lty=2)

#plus measurement error
observed.1<-cycle.1+rnorm(4,sd=.1)
observed.2<-cycle.2+rnorm(4,sd=.1)
plot(y=observed.1,x=t,type='l',ylim=c(0,5))
lines(y=observed.2,x=t,lty=2)

#all at once
plot(y=trend.1,x=t,type='l',ylim=c(0,5),xlab="x",ylab="y",col='red')
lines(y=trend.2,x=t,lty=2,col='blue')
points(y=cycle.1,x=t,pch=20,col='red')
points(y=cycle.2,x=t,pch=20,col='blue')
points(y=observed.1,x=t,col='red')
points(y=observed.2,x=t,col='blue')

#####################################################################

0.2.2 Reshaping Panel Data and Simple Pooled OLS Example

#clean up
rm(list=ls())

#required packages
library(foreign)

##SECTION 2.5: MERGING AND RESHAPING DATA##
#load 1994 and 1995 data in CSV format
#hmnrghts.94<-read.csv("http://j.mp/PTS1994")
#hmnrghts.95<-read.csv("http://j.mp/PTS1995")
hmnrghts.94<-read.csv("pts1994.csv")
hmnrghts.95<-read.csv("pts1995.csv")

#view the top of each data set
head(hmnrghts.94)
##              Country COWAlpha COW WorldBank Amnesty.1994 StateDept.1994
## 1      United States      USA   2       USA            1             NA
## 2             Canada      CAN  20       CAN            1              1
## 3            Bahamas      BHM  31       BHS            1              2
## 4               Cuba      CUB  40       CUB            3              3
## 5              Haiti      HAI  41       HTI            5              4
## 6 Dominican Republic      DOM  42       DOM            2              2
head(hmnrghts.95)
##              Country COWAlpha COW WorldBank Amnesty.1995 StateDept.1995
## 1      United States      USA   2       USA            1             NA
## 2             Canada      CAN  20       CAN           NA              1
## 3            Bahamas      BHM  31       BHS            1              1
## 4               Cuba      CUB  40       CUB            4              3
## 5              Haiti      HAI  41       HTI            2              3
## 6 Dominican Republic      DOM  42       DOM            2              2
#subset 1995 data to necessary variables only
hmnrghts.95<-subset(hmnrghts.95,select=c(COW,Amnesty.1995,StateDept.1995))

#merge 1994 and 1995 data
hmnrghts.wide<-merge(x=hmnrghts.94,y=hmnrghts.95,by=c("COW"))

#view merged data
head(hmnrghts.wide)
##   COW            Country COWAlpha WorldBank Amnesty.1994 StateDept.1994
## 1   2      United States      USA       USA            1             NA
## 2  20             Canada      CAN       CAN            1              1
## 3  31            Bahamas      BHM       BHS            1              2
## 4  40               Cuba      CUB       CUB            3              3
## 5  41              Haiti      HAI       HTI            5              4
## 6  42 Dominican Republic      DOM       DOM            2              2
##   Amnesty.1995 StateDept.1995
## 1            1             NA
## 2           NA              1
## 3            1              1
## 4            4              3
## 5            2              3
## 6            2              2
#number of observations and variables of 1994, 1995, and merged data
dim(hmnrghts.94); dim(hmnrghts.95); dim(hmnrghts.wide)
## [1] 179   6
## [1] 179   3
## [1] 179   8
#reshape merged data into long format
hmnrghts.long<-reshape(hmnrghts.wide,
                       varying=c("Amnesty.1994","StateDept.1994","Amnesty.1995","StateDept.1995"),
                       timevar="year",idvar="COW",direction="long",sep=".")

#view the top of the long data, then the first few 1995 observations
head(hmnrghts.long)
##         COW            Country COWAlpha WorldBank year Amnesty StateDept
## 2.1994    2      United States      USA       USA 1994       1        NA
## 20.1994  20             Canada      CAN       CAN 1994       1         1
## 31.1994  31            Bahamas      BHM       BHS 1994       1         2
## 40.1994  40               Cuba      CUB       CUB 1994       3         3
## 41.1994  41              Haiti      HAI       HTI 1994       5         4
## 42.1994  42 Dominican Republic      DOM       DOM 1994       2         2
head(hmnrghts.long[hmnrghts.long$year==1995,])
##         COW            Country COWAlpha WorldBank year Amnesty StateDept
## 2.1995    2      United States      USA       USA 1995       1        NA
## 20.1995  20             Canada      CAN       CAN 1995      NA         1
## 31.1995  31            Bahamas      BHM       BHS 1995       1         1
## 40.1995  40               Cuba      CUB       CUB 1995       4         3
## 41.1995  41              Haiti      HAI       HTI 1995       2         3
## 42.1995  42 Dominican Republic      DOM       DOM 1995       2         2
#reshape long data into wide form
hmnrghts.wide.2<-reshape(hmnrghts.long,
                      v.names=c("Amnesty","StateDept"),
                      timevar="year",idvar="COW",direction="wide",sep=".")

#view top of new wide data
head(hmnrghts.wide.2)
##         COW            Country COWAlpha WorldBank Amnesty.1994
## 2.1994    2      United States      USA       USA            1
## 20.1994  20             Canada      CAN       CAN            1
## 31.1994  31            Bahamas      BHM       BHS            1
## 40.1994  40               Cuba      CUB       CUB            3
## 41.1994  41              Haiti      HAI       HTI            5
## 42.1994  42 Dominican Republic      DOM       DOM            2
##         StateDept.1994 Amnesty.1995 StateDept.1995
## 2.1994              NA            1             NA
## 20.1994              1           NA              1
## 31.1994              2            1              1
## 40.1994              3            4              3
## 41.1994              4            2              3
## 42.1994              2            2              2
##MORE MERGING##
#more data
#hmnrghts.93 <- read.dta("http://j.mp/PTKstata")
hmnrghts.93 <- read.dta("hmnrghts.dta")

colnames(hmnrghts.93)<-c('country','polity.93','StateDept.1993',
                         'military.93','gnpcats.93','lpop.93',
                         'civ_war.93','int_war.93')

#new merge, and some problems
hmnrghts.wide$Country<-tolower(hmnrghts.wide$Country)
test.1<-merge(x=hmnrghts.wide,y=hmnrghts.93,by.x="Country",by.y="country",all.x=T)
test.2<-merge(x=hmnrghts.wide,y=hmnrghts.93,by.x="Country",by.y="country",all=T)
dim(hmnrghts.wide);dim(hmnrghts.93);dim(test.1);dim(test.2)
## [1] 179   8
## [1] 158   8
## [1] 179  15
## [1] 200  15
summary(test.1)
##    Country               COW           COWAlpha     WorldBank  
##  Length:179         Min.   :  2.0          :  2          :  3  
##  Class :character   1st Qu.:313.0   ISR    :  2   ISR*   :  2  
##  Mode  :character   Median :461.0   AFG    :  1   AFG    :  1  
##                     Mean   :463.7   ALB    :  1   AGO    :  1  
##                     3rd Qu.:666.0   ALG    :  1   ALB    :  1  
##                     Max.   :990.0   ANG    :  1   ARE    :  1  
##                                     (Other):171   (Other):170  
##   Amnesty.1994 StateDept.1994   Amnesty.1995   StateDept.1995 
##  Min.   :1.0   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.0   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :3.0   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :2.8   Mean   :2.574   Mean   :2.712   Mean   :2.438  
##  3rd Qu.:4.0   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:3.000  
##  Max.   :5.0   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  NA's   :29    NA's   :3       NA's   :33      NA's   :3      
##    polity.93      StateDept.1993  military.93     gnpcats.93       
##  Min.   : 0.000   Min.   :1.00   Min.   :0.000   Length:179        
##  1st Qu.: 0.000   1st Qu.:1.00   1st Qu.:0.000   Class :character  
##  Median : 6.000   Median :2.00   Median :0.000   Mode  :character  
##  Mean   : 5.239   Mean   :2.46   Mean   :0.146                     
##  3rd Qu.:10.000   3rd Qu.:3.00   3rd Qu.:0.000                     
##  Max.   :10.000   Max.   :5.00   Max.   :1.000                     
##  NA's   :62       NA's   :42     NA's   :42                        
##     lpop.93        civ_war.93        int_war.93    
##  Min.   :11.29   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:14.60   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :15.86   Median :0.00000   Median :0.0000  
##  Mean   :15.63   Mean   :0.08029   Mean   :0.0292  
##  3rd Qu.:16.83   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :20.89   Max.   :1.00000   Max.   :1.0000  
##  NA's   :42      NA's   :42        NA's   :42
#Differences? Can they be fixed?

#reshape
hmnrghts.three.long<-reshape(test.1,
                      varying=c("StateDept.1993","StateDept.1994","StateDept.1995"),
                      timevar="year",idvar="COW",direction="long",sep=".")
head(hmnrghts.three.long)
##              Country COW COWAlpha WorldBank Amnesty.1994 Amnesty.1995
## 700.1993 afghanistan 700      AFG       AFG            5            5
## 339.1993     albania 339      ALB       ALB            3            3
## 615.1993     algeria 615      ALG       DZA            5            5
## 540.1993      angola 540      ANG       AGO            5            4
## 160.1993   argentina 160      ARG       ARG            2            2
## 371.1993     armenia 371      ARM       ARM            2            2
##          polity.93 military.93 gnpcats.93 lpop.93 civ_war.93 int_war.93
## 700.1993        NA          NA       <NA>      NA         NA         NA
## 339.1993         8           0      <1000   15.04          0          0
## 615.1993         0           0  1000-1999   17.12          0          0
## 540.1993         0           0      <1000   16.20          1          0
## 160.1993         8           0      >4000   17.33          0          0
## 371.1993        NA          NA       <NA>      NA         NA         NA
##          year StateDept
## 700.1993 1993        NA
## 339.1993 1993         2
## 615.1993 1993         5
## 540.1993 1993         5
## 160.1993 1993         2
## 371.1993 1993        NA
##QUICK DESCRIPTIVE STATS##
summary(hmnrghts.three.long)
##    Country               COW           COWAlpha     WorldBank  
##  Length:537         Min.   :  2.0          :  6          :  9  
##  Class :character   1st Qu.:310.0   ISR    :  6   ISR*   :  6  
##  Mode  :character   Median :461.0   AFG    :  3   AFG    :  3  
##                     Mean   :463.7   ALB    :  3   AGO    :  3  
##                     3rd Qu.:666.0   ALG    :  3   ALB    :  3  
##                     Max.   :990.0   ANG    :  3   ARE    :  3  
##                                     (Other):513   (Other):510  
##   Amnesty.1994  Amnesty.1995     polity.93       military.93   
##  Min.   :1.0   Min.   :1.000   Min.   : 0.000   Min.   :0.000  
##  1st Qu.:2.0   1st Qu.:2.000   1st Qu.: 0.000   1st Qu.:0.000  
##  Median :3.0   Median :2.000   Median : 6.000   Median :0.000  
##  Mean   :2.8   Mean   :2.712   Mean   : 5.239   Mean   :0.146  
##  3rd Qu.:4.0   3rd Qu.:4.000   3rd Qu.:10.000   3rd Qu.:0.000  
##  Max.   :5.0   Max.   :5.000   Max.   :10.000   Max.   :1.000  
##  NA's   :87    NA's   :99      NA's   :186      NA's   :126    
##   gnpcats.93           lpop.93        civ_war.93        int_war.93    
##  Length:537         Min.   :11.29   Min.   :0.00000   Min.   :0.0000  
##  Class :character   1st Qu.:14.60   1st Qu.:0.00000   1st Qu.:0.0000  
##  Mode  :character   Median :15.86   Median :0.00000   Median :0.0000  
##                     Mean   :15.63   Mean   :0.08029   Mean   :0.0292  
##                     3rd Qu.:16.83   3rd Qu.:0.00000   3rd Qu.:0.0000  
##                     Max.   :20.89   Max.   :1.00000   Max.   :1.0000  
##                     NA's   :126     NA's   :126       NA's   :126     
##       year        StateDept    
##  Min.   :1993   Min.   :1.000  
##  1st Qu.:1993   1st Qu.:1.000  
##  Median :1994   Median :2.000  
##  Mean   :1994   Mean   :2.493  
##  3rd Qu.:1995   3rd Qu.:3.000  
##  Max.   :1995   Max.   :5.000  
##                 NA's   :48
##BOXPLOTS##
par(mfrow=c(1,2))
boxplot(StateDept~year,data=hmnrghts.three.long,
        subset=polity.93>=6,ylim=c(1,5),main="More Democratic")
boxplot(StateDept~year,data=hmnrghts.three.long,
        subset=polity.93<6,ylim=c(1,5),main="Less Democratic")

#simple pooled model
hmnrghts.three.long$time<-hmnrghts.three.long$year-1993
hmnrghts.three.long$dummy<-as.numeric(hmnrghts.three.long$polity.93>=6)
pooled.mod<-lm(StateDept~time*dummy,data=hmnrghts.three.long)
summary(pooled.mod)
## 
## Call:
## lm(formula = StateDept ~ time * dummy, data = hmnrghts.three.long)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2288 -1.0409 -0.0488  0.8889  2.9671 
## 
## Coefficients:
##             Estimate Std. Error t value      Pr(>|t|)    
## (Intercept)  3.22876    0.14418  22.394       < 2e-16 ***
## time        -0.05882    0.11168  -0.527         0.599    
## dummy       -1.19585    0.19207  -6.226 0.00000000139 ***
## time:dummy   0.06677    0.14894   0.448         0.654    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.128 on 345 degrees of freedom
##   (188 observations deleted due to missingness)
## Multiple R-squared:  0.2003, Adjusted R-squared:  0.1933 
## F-statistic:  28.8 on 3 and 345 DF,  p-value: < 0.00000000000000022
#####################################################################

0.2.3 Modeling the Mean: Response Profiles v. Parametric Curves

#Load libraries
rm(list=ls())
library(nlme)
library(car)


###DATA MANAGEMENT###
#load data
#tlc<-read.table(file.choose(), header=TRUE, sep="")
tlc<-read.table("tlc.txt", header=TRUE, sep="")

#reshape data
m.tlc<-reshape(tlc, varying=c("w0","w1","w4","w6"), idvar="id", timevar="week",direction="long",sep="")

#relevel treatment so that Placebo is the reference
m.tlc$treat<-relevel(m.tlc$treat,"P")

#rename our dependent variable of blood lead level from "w" to "value"
colnames(m.tlc)[4]<-"value"


###RESPONSE PROFILES###
#Just for the heck of it: OLS
ols.profiles<-lm(value~as.factor(treat)+as.factor(week)+
                   as.factor(treat)*as.factor(week), data=m.tlc)
summary(ols.profiles)
## 
## Call:
## lm(formula = value ~ as.factor(treat) + as.factor(week) + as.factor(treat) * 
##     as.factor(week), data = m.tlc)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.662  -4.620  -0.993   3.672  43.138 
## 
## Coefficients:
##                                    Estimate Std. Error t value
## (Intercept)                          26.272      0.937  28.038
## as.factor(treat)A                     0.268      1.325   0.202
## as.factor(week)1                     -1.612      1.325  -1.216
## as.factor(week)4                     -2.202      1.325  -1.662
## as.factor(week)6                     -2.626      1.325  -1.982
## as.factor(treat)A:as.factor(week)1  -11.406      1.874  -6.086
## as.factor(treat)A:as.factor(week)4   -8.824      1.874  -4.709
## as.factor(treat)A:as.factor(week)6   -3.152      1.874  -1.682
##                                         Pr(>|t|)    
## (Intercept)                              < 2e-16 ***
## as.factor(treat)A                         0.8398    
## as.factor(week)1                          0.2245    
## as.factor(week)4                          0.0974 .  
## as.factor(week)6                          0.0482 *  
## as.factor(treat)A:as.factor(week)1 0.00000000275 ***
## as.factor(treat)A:as.factor(week)4 0.00000346729 ***
## as.factor(treat)A:as.factor(week)6        0.0934 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.626 on 392 degrees of freedom
## Multiple R-squared:  0.3284, Adjusted R-squared:  0.3164 
## F-statistic: 27.38 on 7 and 392 DF,  p-value: < 0.00000000000000022
#Now the real model: GLS with REML (note: you need 'nlme' here)
# This function fits a linear model using generalized least squares. The errors are allowed to be correlated and/or have unequal variances.
gls.profiles<-nlme::gls(value~as.factor(treat)+as.factor(week)+
                    as.factor(treat)*as.factor(week), 
                  data=m.tlc, method="REML", 
                  correlation= corSymm(form=~1|id), 
                  na.action=na.omit)
#What do you make of the na.action?
#next week: more on corClasses
summary(gls.profiles)
## Generalized least squares fit by REML
##   Model: value ~ as.factor(treat) + as.factor(week) + as.factor(treat) *      as.factor(week) 
##   Data: m.tlc 
##        AIC      BIC    logLik
##   2471.632 2531.201 -1220.816
## 
## Correlation Structure: General
##  Formula: ~1 | id 
##  Parameter estimate(s):
##  Correlation: 
##   1     2     3    
## 2 0.596            
## 3 0.582 0.769      
## 4 0.536 0.552 0.551
## 
## Coefficients:
##                                      Value Std.Error   t-value p-value
## (Intercept)                         26.272 0.9374730 28.024273  0.0000
## as.factor(treat)A                    0.268 1.3257871  0.202144  0.8399
## as.factor(week)1                    -1.612 0.8425878 -1.913154  0.0565
## as.factor(week)4                    -2.202 0.8576242 -2.567558  0.0106
## as.factor(week)6                    -2.626 0.9034129 -2.906755  0.0039
## as.factor(treat)A:as.factor(week)1 -11.406 1.1915990 -9.572012  0.0000
## as.factor(treat)A:as.factor(week)4  -8.824 1.2128637 -7.275343  0.0000
## as.factor(treat)A:as.factor(week)6  -3.152 1.2776188 -2.467090  0.0140
## 
##  Correlation: 
##                                    (Intr) as.()A as.()1 as.()4 as.()6
## as.factor(treat)A                  -0.707                            
## as.factor(week)1                   -0.449  0.318                     
## as.factor(week)4                   -0.457  0.323  0.719              
## as.factor(week)6                   -0.482  0.341  0.485  0.492       
## as.factor(treat)A:as.factor(week)1  0.318 -0.449 -0.707 -0.508 -0.343
## as.factor(treat)A:as.factor(week)4  0.323 -0.457 -0.508 -0.707 -0.348
## as.factor(treat)A:as.factor(week)6  0.341 -0.482 -0.343 -0.348 -0.707
##                                    a.()A:.()1 a.()A:.()4
## as.factor(treat)A                                       
## as.factor(week)1                                        
## as.factor(week)4                                        
## as.factor(week)6                                        
## as.factor(treat)A:as.factor(week)1                      
## as.factor(treat)A:as.factor(week)4  0.719               
## as.factor(treat)A:as.factor(week)6  0.485      0.492    
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.5135258 -0.6970199 -0.1497978  0.5540105  6.5075307 
## 
## Residual standard error: 6.628935 
## Degrees of freedom: 400 total; 392 residual
AIC(gls.profiles)
## [1] 2471.632
#Means of placebo and treated
placebo.mean <- by(m.tlc$value[m.tlc$treat=="P"], 
                   m.tlc$week[m.tlc$treat=="P"], 
                   mean, na.rm=T)
agent.mean <- by(m.tlc$value[m.tlc$treat=="A"], 
                 m.tlc$week[m.tlc$treat=="A"], 
                 mean)

#Plot Expectations
a<-gls.profiles$coefficients
time<-c(0,1,4,6)
placebo<-c(a[1],a[1]+a[3],a[1]+a[4],a[1]+a[5])
agent<-c(a[1]+a[2],a[1]+a[2]+a[3]+a[6],
         a[1]+a[2]+a[4]+a[7],a[1]+a[2]+a[5]+a[8])

plot(y=placebo,x=time,type='l',ylim=c(10,30))
lines(y=agent,x=time,lty=2)
points(y=placebo.mean,x=time)
points(y=agent.mean,x=time,pch=20)

#Do over-time response profiles differ by group?
#Wald test
rhs<-c(0,0,0)
hm<-rbind(
c(0,0,0,0,0,1,0,0),
c(0,0,0,0,0,0,1,0),
c(0,0,0,0,0,0,0,1)
)
linearHypothesis(gls.profiles,hm,rhs)
## Linear hypothesis test
## 
## Hypothesis:
## as.factor(treat)A:as.factor(week)1 = 0
## as.factor(treat)A:as.factor(week)4 = 0
## as.factor(treat)A:as.factor(week)6 = 0
## 
## Model 1: restricted model
## Model 2: value ~ as.factor(treat) + as.factor(week) + as.factor(treat) * 
##     as.factor(week)
## 
##   Df  Chisq            Pr(>Chisq)    
## 1                                    
## 2  3 99.249 < 0.00000000000000022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#LR test (notice re-estimation with ML)
gls.ml<-gls(value~as.factor(treat)+as.factor(week)+
              as.factor(treat)*as.factor(week), 
            data=m.tlc, method="ML", 
            correlation= corSymm(form=~1|id))

no.inter<-gls(value~as.factor(treat)+as.factor(week), 
              data=m.tlc, method="ML", 
              correlation= corSymm(form=~1|id))

anova(gls.ml,no.inter)
##          Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## gls.ml       1 15 2481.445 2541.317 -1225.722                        
## no.inter     2 12 2541.949 2589.847 -1258.975 1 vs 2 66.50439  <.0001
###PARAMETRIC CURVES###
#Linear Time Trend
linear<-gls(value~as.factor(treat)+week+
              as.factor(treat)*week, data=m.tlc, 
            method="REML", correlation= corSymm(form=~1|id))
summary(linear)
## Generalized least squares fit by REML
##   Model: value ~ as.factor(treat) + week + as.factor(treat) * week 
##   Data: m.tlc 
##       AIC      BIC   logLik
##   2595.58 2639.375 -1286.79
## 
## Correlation Structure: General
##  Formula: ~1 | id 
##  Parameter estimate(s):
##  Correlation: 
##   1      2      3     
## 2 -0.021              
## 3  0.194  0.779       
## 4  0.570  0.336  0.460
## 
## Coefficients:
##                            Value Std.Error  t-value p-value
## (Intercept)            25.632659 0.7438572 34.45911  0.0000
## as.factor(treat)A      -5.431447 1.0519730 -5.16310  0.0000
## week                   -0.344673 0.1220539 -2.82394  0.0050
## as.factor(treat)A:week  0.091068 0.1726103  0.52760  0.5981
## 
##  Correlation: 
##                        (Intr) as.()A week  
## as.factor(treat)A      -0.707              
## week                   -0.140  0.099       
## as.factor(treat)A:week  0.099 -0.140 -0.707
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.29776505 -0.65413448 -0.06926895  0.57725972  6.05949763 
## 
## Residual standard error: 7.462733 
## Degrees of freedom: 400 total; 396 residual
AIC(linear)
## [1] 2595.58
#Quadratic Time Trend
quadratic<-gls(value~as.factor(treat)+week+I(week^2)+
                 as.factor(treat)*week+as.factor(treat)*I(week^2), 
               data=m.tlc, method="REML", correlation= corSymm(form=~1|id))
summary(quadratic)
## Generalized least squares fit by REML
##   Model: value ~ as.factor(treat) + week + I(week^2) + as.factor(treat) *      week + as.factor(treat) * I(week^2) 
##   Data: m.tlc 
##        AIC      BIC    logLik
##   2562.444 2614.136 -1268.222
## 
## Correlation Structure: General
##  Formula: ~1 | id 
##  Parameter estimate(s):
##  Correlation: 
##   1     2     3    
## 2 0.236            
## 3 0.592 0.615      
## 4 0.427 0.529 0.526
## 
## Coefficients:
##                                 Value Std.Error  t-value p-value
## (Intercept)                 25.790988 0.7831345 32.93302  0.0000
## as.factor(treat)A           -3.332650 1.1075195 -3.00911  0.0028
## week                        -0.758501 0.5610096 -1.35203  0.1771
## I(week^2)                    0.070594 0.0965306  0.73131  0.4650
## as.factor(treat)A:week      -5.435095 0.7933873 -6.85049  0.0000
## as.factor(treat)A:I(week^2)  0.946460 0.1365149  6.93302  0.0000
## 
##  Correlation: 
##                             (Intr) as.()A week   I(w^2) as.()A:
## as.factor(treat)A           -0.707                             
## week                        -0.220  0.156                      
## I(week^2)                    0.171 -0.121 -0.978               
## as.factor(treat)A:week       0.156 -0.220 -0.707  0.691        
## as.factor(treat)A:I(week^2) -0.121  0.171  0.691 -0.707 -0.978 
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.59012968 -0.65648466 -0.09227529  0.59984631  6.10631369 
## 
## Residual standard error: 6.876374 
## Degrees of freedom: 400 total; 394 residual
AIC(quadratic)
## [1] 2562.444
#Spline with One Knot
m.tlc$w1<-pmax(m.tlc$week-1,0) #note: parallel maxima
spline<-gls(value~as.factor(treat)+week+w1+as.factor(treat)*week+
              as.factor(treat)*w1, data=m.tlc, method="REML", 
            correlation= corSymm(form=~1|id)) 
summary(spline) #we included the treatment main effect
## Generalized least squares fit by REML
##   Model: value ~ as.factor(treat) + week + w1 + as.factor(treat) * week +      as.factor(treat) * w1 
##   Data: m.tlc 
##        AIC      BIC    logLik
##   2488.123 2539.816 -1231.062
## 
## Correlation Structure: General
##  Formula: ~1 | id 
##  Parameter estimate(s):
##  Correlation: 
##   1     2     3    
## 2 0.600            
## 3 0.577 0.762      
## 4 0.532 0.549 0.528
## 
## Coefficients:
##                             Value Std.Error   t-value p-value
## (Intercept)             26.271418 0.9440823 27.827467  0.0000
## as.factor(treat)A        0.342922 1.3351340  0.256844  0.7974
## week                    -1.612527 0.8442870 -1.909927  0.0569
## w1                       1.411934 0.9335284  1.512470  0.1312
## as.factor(treat)A:week -11.338158 1.1940021 -9.495928  0.0000
## as.factor(treat)A:w1    12.704573 1.3202085  9.623156  0.0000
## 
##  Correlation: 
##                        (Intr) as.()A week   w1     as.()A:
## as.factor(treat)A      -0.707                             
## week                   -0.448  0.317                      
## w1                      0.393 -0.278 -0.989               
## as.factor(treat)A:week  0.317 -0.448 -0.707  0.699        
## as.factor(treat)A:w1   -0.278  0.393  0.699 -0.707 -0.989 
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.3051791 -0.6828107 -0.1297304  0.5559840  6.6503051 
## 
## Residual standard error: 6.677473 
## Degrees of freedom: 400 total; 394 residual
AIC(spline)
## [1] 2488.123
#draw pictures and compare to repsonse profiles
time.ruler<-seq(0,6,by=.01)
spline.ruler<-pmax(time.ruler-1,0) 
newdata.P<-m.tlc[rep(1,601),]
newdata.P[,2]<-as.factor("P")
newdata.P[,3]<-time.ruler
newdata.P[,5]<-spline.ruler
newdata.A<-m.tlc[rep(1,601),]
newdata.A[,2]<-as.factor("A")
newdata.A[,3]<-time.ruler
newdata.A[,5]<-spline.ruler

placebo.linear<-predict(linear,newdata=newdata.P)
agent.linear<-predict(linear,newdata=newdata.A)
placebo.quadratic<-predict(quadratic,newdata=newdata.P)
agent.quadratic<-predict(quadratic,newdata=newdata.A)
placebo.spline<-predict(spline,newdata=newdata.P)
agent.spline<-predict(spline,newdata=newdata.A)

#plot all forms of our model in the same space (calls on objects created in response profile code)
#also try adding one model's lines at a time
plot(y=placebo.mean,x=time,type='p',ylim=c(10,30))
points(y=agent.mean,x=time,pch=20)
lines(y=placebo,x=time)
lines(y=agent,x=time,lty=2)
lines(y=placebo.linear,x=time.ruler,col='blue')
lines(y=agent.linear,x=time.ruler,col='blue',lty=2)
lines(y=placebo.quadratic,x=time.ruler,col='red')
lines(y=agent.quadratic,x=time.ruler,col='red',lty=2)
lines(y=placebo.spline,x=time.ruler,col='forestgreen')
lines(y=agent.spline,x=time.ruler,col='forestgreen',lty=2)

0.2.4 Modeling the Covariance and Analyzing Residuals

#clean up
rm(list=ls())

#Load libraries
library(nlme)
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:Matrix':
## 
##     expand
library(car)

###DATA MANAGEMENT###
#load data (missing is listed as ".")
#exercise<-read.table(file.choose(), header=TRUE, sep="")
exercise.0<-read.table("exercise.txt", header=TRUE, sep="", na.strings=".")

#listwise deletion (Gasp!)
exercise<-na.omit(exercise.0)

#reshape data
m.exercise<-melt.data.frame(data=exercise, 
                            measure.vars=c("d0","d2","d4","d6",
                                           "d8","d10","d12"), 
                            id=c("id","prog"))

m.exercise$value<-as.numeric(m.exercise$value)

#create variable for day
m.exercise$day<-as.numeric(substr(m.exercise$variable,2,3))

#Subset to make waves uneven for the purpose of illustration
data<-subset(m.exercise, day==0 | day==4 | day==6 | day==8 | day==12)

#relevel treatment so that Placebo is the reference
data$treat<-data$prog-1


###CHOOSE A COVARIANCE STRUCTURE###
##Unstructured##
unstructured<-gls(value~treat+as.factor(day)+treat*as.factor(day), 
                  data=data, method="REML", na.action=na.omit, 
                  correlation= corSymm(form=~1|id))
summary(unstructured)
## Generalized least squares fit by REML
##   Model: value ~ treat + as.factor(day) + treat * as.factor(day) 
##   Data: data 
##        AIC     BIC    logLik
##   407.6288 463.362 -182.8144
## 
## Correlation Structure: General
##  Formula: ~1 | id 
##  Parameter estimate(s):
##  Correlation: 
##   1     2     3     4    
## 2 0.940                  
## 3 0.917 0.958            
## 4 0.906 0.961 0.960      
## 5 0.872 0.914 0.933 0.959
## 
## Coefficients:
##                           Value Std.Error  t-value p-value
## (Intercept)            79.20000 1.0668462 74.23751  0.0000
## treat                   1.56923 1.4190381  1.10584  0.2713
## as.factor(day)4         1.40000 0.3704020  3.77968  0.0003
## as.factor(day)6         1.50000 0.4347482  3.45027  0.0008
## as.factor(day)8         2.30000 0.4628240  4.96949  0.0000
## as.factor(day)12        2.10000 0.5403588  3.88631  0.0002
## treat:as.factor(day)4  -0.47692 0.4926808 -0.96802  0.3353
## treat:as.factor(day)6   0.11538 0.5782692  0.19953  0.8422
## treat:as.factor(day)8  -1.06923 0.6156135 -1.73685  0.0853
## treat:as.factor(day)12 -0.17692 0.7187444 -0.24616  0.8060
## 
##  Correlation: 
##                        (Intr) treat  as.()4 as.()6 as.()8 a.()12 t:.()4
## treat                  -0.752                                          
## as.factor(day)4        -0.174  0.131                                   
## as.factor(day)6        -0.204  0.153  0.716                            
## as.factor(day)8        -0.217  0.163  0.763  0.774                     
## as.factor(day)12       -0.253  0.190  0.584  0.701  0.824              
## treat:as.factor(day)4   0.131 -0.174 -0.752 -0.538 -0.574 -0.439       
## treat:as.factor(day)6   0.153 -0.204 -0.538 -0.752 -0.582 -0.527  0.716
## treat:as.factor(day)8   0.163 -0.217 -0.574 -0.582 -0.752 -0.619  0.763
## treat:as.factor(day)12  0.190 -0.253 -0.439 -0.527 -0.619 -0.752  0.584
##                        t:.()6 t:.()8
## treat                               
## as.factor(day)4                     
## as.factor(day)6                     
## as.factor(day)8                     
## as.factor(day)12                    
## treat:as.factor(day)4               
## treat:as.factor(day)6               
## treat:as.factor(day)8   0.774       
## treat:as.factor(day)12  0.701  0.824
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.07489555 -0.69657208  0.06840315  0.68289145  1.68955780 
## 
## Residual standard error: 3.373664 
## Degrees of freedom: 115 total; 105 residual
AIC(unstructured)
## [1] 407.6288
##First-Order Autoregressive##
ar.1<-gls(value~treat+as.factor(day)+treat*as.factor(day), 
          data=data, method="REML", na.action=na.omit, 
          correlation= corAR1(form=~1|id))
summary(ar.1)
## Generalized least squares fit by REML
##   Model: value ~ treat + as.factor(day) + treat * as.factor(day) 
##   Data: data 
##        AIC      BIC    logLik
##   399.9096 431.7571 -187.9548
## 
## Correlation Structure: AR(1)
##  Formula: ~1 | id 
##  Parameter estimate(s):
##       Phi 
## 0.9546676 
## 
## Coefficients:
##                           Value Std.Error  t-value p-value
## (Intercept)            79.20000 1.0760813 73.60039  0.0000
## treat                   1.56923 1.4313219  1.09635  0.2754
## as.factor(day)4         1.40000 0.3240145  4.32079  0.0000
## as.factor(day)6         1.50000 0.4530028  3.31124  0.0013
## as.factor(day)8         2.30000 0.5485383  4.19296  0.0001
## as.factor(day)12        2.10000 0.6262902  3.35308  0.0011
## treat:as.factor(day)4  -0.47692 0.4309796 -1.10660  0.2710
## treat:as.factor(day)6   0.11538 0.6025501  0.19149  0.8485
## treat:as.factor(day)8  -1.06923 0.7296241 -1.46545  0.1458
## treat:as.factor(day)12 -0.17692 0.8330438 -0.21238  0.8322
## 
##  Correlation: 
##                        (Intr) treat  as.()4 as.()6 as.()8 a.()12 t:.()4
## treat                  -0.752                                          
## as.factor(day)4        -0.151  0.113                                   
## as.factor(day)6        -0.210  0.158  0.699                            
## as.factor(day)8        -0.255  0.192  0.565  0.807                     
## as.factor(day)12       -0.291  0.219  0.484  0.691  0.856              
## treat:as.factor(day)4   0.113 -0.151 -0.752 -0.526 -0.424 -0.364       
## treat:as.factor(day)6   0.158 -0.210 -0.526 -0.752 -0.607 -0.520  0.699
## treat:as.factor(day)8   0.192 -0.255 -0.424 -0.607 -0.752 -0.644  0.565
## treat:as.factor(day)12  0.219 -0.291 -0.364 -0.520 -0.644 -0.752  0.484
##                        t:.()6 t:.()8
## treat                               
## as.factor(day)4                     
## as.factor(day)6                     
## as.factor(day)8                     
## as.factor(day)12                    
## treat:as.factor(day)4               
## treat:as.factor(day)6               
## treat:as.factor(day)8   0.807       
## treat:as.factor(day)12  0.691  0.856
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.0570885 -0.6905940  0.0678161  0.6770308  1.6750578 
## 
## Residual standard error: 3.402868 
## Degrees of freedom: 115 total; 105 residual
AIC(ar.1)
## [1] 399.9096
#What does the AR(1) look like?
p.1<-0.9546676

#remember: 0, 4, 6, 8, 12
row1 <-c(1,p.1,p.1^2,p.1^3,p.1^4)
row2 <-c(p.1,1,p.1,p.1^2,p.1^3)
row3 <-c(p.1^2,p.1,1,p.1,p.1^2)
row4 <-c(p.1^3,p.1^2,p.1,1,p.1)
row5 <-c(p.1^4,p.1^3,p.1^2,p.1,1)

ar1.cor<-rbind(row1,row2,row3,row4,row5); ar1.cor
##           [,1]      [,2]      [,3]      [,4]      [,5]
## row1 1.0000000 0.9546676 0.9113902 0.8700747 0.8306321
## row2 0.9546676 1.0000000 0.9546676 0.9113902 0.8700747
## row3 0.9113902 0.9546676 1.0000000 0.9546676 0.9113902
## row4 0.8700747 0.9113902 0.9546676 1.0000000 0.9546676
## row5 0.8306321 0.8700747 0.9113902 0.9546676 1.0000000
#Exponential##
exp.mod<-gls(value~treat+as.factor(day)+treat*as.factor(day), 
             data=data, method="REML", na.action=na.omit, 
             correlation= corCAR1(form=~day|id))
summary(exp.mod)
## Generalized least squares fit by REML
##   Model: value ~ treat + as.factor(day) + treat * as.factor(day) 
##   Data: data 
##        AIC      BIC    logLik
##   401.0567 432.9042 -188.5283
## 
## Correlation Structure: Continuous AR(1)
##  Formula: ~day | id 
##  Parameter estimate(s):
##       Phi 
## 0.9835166 
## 
## Coefficients:
##                           Value Std.Error  t-value p-value
## (Intercept)            79.20000 1.0777090 73.48923  0.0000
## treat                   1.56923 1.4334870  1.09469  0.2762
## as.factor(day)4         1.40000 0.3865395  3.62188  0.0005
## as.factor(day)6         1.50000 0.4695484  3.19456  0.0018
## as.factor(day)8         2.30000 0.5377873  4.27678  0.0000
## as.factor(day)12        2.10000 0.6480935  3.24027  0.0016
## treat:as.factor(day)4  -0.47692 0.5141456 -0.92760  0.3557
## treat:as.factor(day)6   0.11538 0.6245577  0.18475  0.8538
## treat:as.factor(day)8  -1.06923 0.7153239 -1.49475  0.1380
## treat:as.factor(day)12 -0.17692 0.8620450 -0.20524  0.8378
## 
##  Correlation: 
##                        (Intr) treat  as.()4 as.()6 as.()8 a.()12 t:.()4
## treat                  -0.752                                          
## as.factor(day)4        -0.179  0.135                                   
## as.factor(day)6        -0.218  0.164  0.810                            
## as.factor(day)8        -0.250  0.188  0.696  0.859                     
## as.factor(day)12       -0.301  0.226  0.559  0.690  0.803              
## treat:as.factor(day)4   0.135 -0.179 -0.752 -0.609 -0.523 -0.420       
## treat:as.factor(day)6   0.164 -0.218 -0.609 -0.752 -0.646 -0.519  0.810
## treat:as.factor(day)8   0.188 -0.250 -0.523 -0.646 -0.752 -0.604  0.696
## treat:as.factor(day)12  0.226 -0.301 -0.420 -0.519 -0.604 -0.752  0.559
##                        t:.()6 t:.()8
## treat                               
## as.factor(day)4                     
## as.factor(day)6                     
## as.factor(day)8                     
## as.factor(day)12                    
## treat:as.factor(day)4               
## treat:as.factor(day)6               
## treat:as.factor(day)8   0.859       
## treat:as.factor(day)12  0.690  0.803
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.05398155 -0.68955095  0.06771368  0.67600821  1.67252783 
## 
## Residual standard error: 3.408015 
## Degrees of freedom: 115 total; 105 residual
AIC(exp.mod)
## [1] 401.0567
#What does the exponential look like?
p<-0.9835166

#remember: 0, 4, 6, 8, 12
r1 <-c(1,p^4,p^6,p^8,p^12)
r2 <-c(p^4,1,p^2,p^4,p^8)
r3 <-c(p^6,p^2,1,p^2,p^6)
r4 <-c(p^8,p^4,p^2,1,p^4)
r5 <-c(p^12,p^8,p^6,p^4,1)

exp.cor<-rbind(r1,r2,r3,r4,r5); exp.cor
##         [,1]      [,2]      [,3]      [,4]      [,5]
## r1 1.0000000 0.9356788 0.9050867 0.8754948 0.8191819
## r2 0.9356788 1.0000000 0.9673049 0.9356788 0.8754948
## r3 0.9050867 0.9673049 1.0000000 0.9673049 0.9050867
## r4 0.8754948 0.9356788 0.9673049 1.0000000 0.9356788
## r5 0.8191819 0.8754948 0.9050867 0.9356788 1.0000000
#Toeplitz (back door)
ar.4<-gls(value~treat+as.factor(day)+treat*as.factor(day), 
          data=data, method="REML", na.action=na.omit, 
          correlation= corARMA(p=4, form=~1|id))
summary(ar.4)
## Generalized least squares fit by REML
##   Model: value ~ treat + as.factor(day) + treat * as.factor(day) 
##   Data: data 
##        AIC      BIC    logLik
##   399.9912 439.8006 -184.9956
## 
## Correlation Structure: ARMA(4,0)
##  Formula: ~1 | id 
##  Parameter estimate(s):
##        Phi1        Phi2        Phi3        Phi4 
##  0.70405177  0.35048067 -0.12343874  0.03547153 
## 
## Coefficients:
##                           Value Std.Error  t-value p-value
## (Intercept)            79.20000 1.0819234 73.20297  0.0000
## treat                   1.56923 1.4390926  1.09043  0.2780
## as.factor(day)4         1.40000 0.3242031  4.31828  0.0000
## as.factor(day)6         1.50000 0.3800323  3.94703  0.0001
## as.factor(day)8         2.30000 0.4695994  4.89779  0.0000
## as.factor(day)12        2.10000 0.5207479  4.03266  0.0001
## treat:as.factor(day)4  -0.47692 0.4312304 -1.10596  0.2713
## treat:as.factor(day)6   0.11538 0.5054902  0.22826  0.8199
## treat:as.factor(day)8  -1.06923 0.6246255 -1.71179  0.0899
## treat:as.factor(day)12 -0.17692 0.6926594 -0.25543  0.7989
## 
##  Correlation: 
##                        (Intr) treat  as.()4 as.()6 as.()8 a.()12 t:.()4
## treat                  -0.752                                          
## as.factor(day)4        -0.150  0.113                                   
## as.factor(day)6        -0.176  0.132  0.586                            
## as.factor(day)8        -0.217  0.163  0.595  0.728                     
## as.factor(day)12       -0.241  0.181  0.461  0.685  0.790              
## treat:as.factor(day)4   0.113 -0.150 -0.752 -0.441 -0.447 -0.347       
## treat:as.factor(day)6   0.132 -0.176 -0.441 -0.752 -0.547 -0.515  0.586
## treat:as.factor(day)8   0.163 -0.217 -0.447 -0.547 -0.752 -0.594  0.595
## treat:as.factor(day)12  0.181 -0.241 -0.347 -0.515 -0.594 -0.752  0.461
##                        t:.()6 t:.()8
## treat                               
## as.factor(day)4                     
## as.factor(day)6                     
## as.factor(day)8                     
## as.factor(day)12                    
## treat:as.factor(day)4               
## treat:as.factor(day)6               
## treat:as.factor(day)8   0.728       
## treat:as.factor(day)12  0.685  0.790
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.04598073 -0.68686496  0.06744991  0.67337498  1.66601288 
## 
## Residual standard error: 3.421342 
## Degrees of freedom: 115 total; 105 residual
AIC(ar.4)
## [1] 399.9912
#What does it look like?
t.0<-1
t.1<-0.70405177
t.2<-0.70405177^2 + 0.35048067
t.3<- 0.70405177^3 + 0.35048067^2 -0.12343874
t.4<- 0.70405177^4 + 0.35048067^3 +(-0.12343874)^2 + 0.03547153

r1 <-c(t.0,t.1,t.2,t.3,t.4)
r2 <-c(t.1,t.0,t.1,t.2,t.3)
r3 <-c(t.2,t.1,t.0,t.1,t.2)
r4 <-c(t.3,t.2,t.1,t.0,t.1)
r5 <-c(t.4,t.3,t.2,t.1,t.0)

toep.cor<-rbind(r1,r2,r3,r4,r5); toep.cor
##         [,1]      [,2]      [,3]      [,4]      [,5]
## r1 1.0000000 0.7040518 0.8461696 0.3483886 0.3394680
## r2 0.7040518 1.0000000 0.7040518 0.8461696 0.3483886
## r3 0.8461696 0.7040518 1.0000000 0.7040518 0.8461696
## r4 0.3483886 0.8461696 0.7040518 1.0000000 0.7040518
## r5 0.3394680 0.3483886 0.8461696 0.7040518 1.0000000
###RESIDUAL ANALYSIS###
#create transformed residuals
data$resid<-ar.1$resid
data$yhat<-fitted(ar.1)
alt.Sigma<-(unstructured$sigma^2)*as.matrix(unstructured$modelStruct$corStruct)$'2' #strategy for unstructured
Sigma<-(ar.1$sigma^2)*ar1.cor #strategy for first-order autoregressive
tL<-chol(alt.Sigma)
L<-t(tL)
L%*%tL
##           [,1]     [,2]     [,3]     [,4]      [,5]
## [1,] 11.381607 10.69562 10.43658 10.31058  9.921669
## [2,] 10.695619 11.38161 10.90379 10.93331 10.405237
## [3,] 10.436577 10.90379 11.38161 10.92224 10.623756
## [4,] 10.310577 10.93331 10.92224 11.38161 10.910219
## [5,]  9.921669 10.40524 10.62376 10.91022 11.381607
inv.lower<-solve(L)
for(i in data$id){
    data$std[data$id==i]<-inv.lower%*%as.matrix(data$resid[data$id==i])
    data$fit.std[data$id==i]<-inv.lower%*%as.matrix(data$yhat[data$id==i])
}

#plot transformed residuals against transformed fitted values
plot(y=data$std,x=data$fit.std)

plot(y=data$std,x=data$day)

#plot untransformed residuals against fitted values
plot(y=data$resid,x=data$yhat)

plot(y=data$resid,x=data$day)

#semivariogram
#plot(Variogram(ar.1, form=~day|id,resType="normalized"))
#contrast:
#plot(Variogram(ar.1, form=~day|id))

#Mahalanobis distance
data$d<-rep(NA,nrow(data))
data$p<-rep(NA,nrow(data))

for(i in data$id){
    r<-as.vector(data$std[data$id==i])
    data$d[data$id==i]<-t(r)%*%r
    data$p[data$id==i]<-1-pchisq(data$d[i],df=length(r))
}

table(data$id[data$p<.05])
## 
## 12 35 
##  5  5
length(table(data$id[data$p<.05]))/23
## [1] 0.08695652

0.2.5 Linear Mixed Effects Models

#clean up
rm(list=ls())

#packages
library(faraway)
## 
## Attaching package: 'faraway'
## The following object is masked from 'package:lattice':
## 
##     melanoma
## The following objects are masked from 'package:car':
## 
##     logit, vif
library(lme4)
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
## 
##     lmList
library(nlme)
library(lattice)

#load data
data(psid)

#View the data
xyplot(log(income+100)~year|sex, psid, type='l', groups=person)

boxplot(log(income+100)~year, psid)

#Create a variable: "centered year"
psid$cyear <- psid$year-78



#Run a mixed-effects model with random intercept only
#Syntax for "lme4" library
income.mod.int <- lmer(log(income)~cyear*sex+age+educ+(1|person), data=psid)
summary(income.mod.int)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ cyear * sex + age + educ + (1 | person)
##    Data: psid
## 
## REML criterion at convergence: 3964.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.5182 -0.2257  0.1282  0.4563  2.5309 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  person   (Intercept) 0.2893   0.5379  
##  Residual             0.5530   0.7436  
## Number of obs: 1661, groups:  person, 85
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)  6.753055   0.558674  12.088
## cyear        0.080155   0.004464  17.955
## sexM         1.126062   0.123062   9.150
## age          0.008724   0.013895   0.628
## educ         0.106692   0.022073   4.834
## cyear:sexM  -0.018597   0.005949  -3.126
## 
## Correlation of Fixed Effects:
##            (Intr) cyear  sexM   age    educ  
## cyear      -0.012                            
## sexM       -0.104  0.039                     
## age        -0.874  0.005 -0.025              
## educ       -0.598 -0.001  0.010  0.167       
## cyear:sexM  0.029 -0.750 -0.027 -0.017 -0.018
#Syntax for "lme" library
income.mod.int.2 <- lme(log(income)~cyear*sex+age+educ+cyear, 
                        random=~1|person, data=psid)
summary(income.mod.int.2)
## Linear mixed-effects model fit by REML
##  Data: psid 
##        AIC      BIC    logLik
##   3980.726 4024.019 -1982.363
## 
## Random effects:
##  Formula: ~1 | person
##         (Intercept)  Residual
## StdDev:   0.5378884 0.7436354
## 
## Fixed effects: log(income) ~ cyear * sex + age + educ + cyear 
##                 Value Std.Error   DF   t-value p-value
## (Intercept)  6.753055 0.5586742 1574 12.087644  0.0000
## cyear        0.080155 0.0044641 1574 17.955301  0.0000
## sexM         1.126062 0.1230617   81  9.150389  0.0000
## age          0.008724 0.0138951   81  0.627846  0.5319
## educ         0.106692 0.0220734   81  4.833502  0.0000
## cyear:sexM  -0.018597 0.0059491 1574 -3.126082  0.0018
##  Correlation: 
##            (Intr) cyear  sexM   age    educ  
## cyear      -0.012                            
## sexM       -0.104  0.039                     
## age        -0.874  0.005 -0.025              
## educ       -0.598 -0.001  0.010  0.167       
## cyear:sexM  0.029 -0.750 -0.027 -0.017 -0.018
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -9.5182099 -0.2257278  0.1281839  0.4562720  2.5308800 
## 
## Number of Observations: 1661
## Number of Groups: 85
#Try a fixed effects model
income.mod.fe <- lm(log(income)~cyear*sex+age+educ+cyear+as.factor(person), 
                    data=psid)
summary(income.mod.fe)
## 
## Call:
## lm(formula = log(income) ~ cyear * sex + age + educ + cyear + 
##     as.factor(person), data = psid)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9895 -0.1673  0.0816  0.3359  2.0169 
## 
## Coefficients: (3 not defined because of singularities)
##                      Estimate Std. Error t value   Pr(>|t|)    
## (Intercept)         10.145410  12.920967   0.785    0.43246    
## cyear                0.079958   0.004483  17.835    < 2e-16 ***
## sexM                 1.145185   1.660946   0.689    0.49062    
## age                 -0.107610   0.409948  -0.262    0.79297    
## educ                 0.115639   0.164272   0.704    0.48157    
## as.factor(person)2  -0.179434   0.326953  -0.549    0.58322    
## as.factor(person)3   0.507549   3.304448   0.154    0.87795    
## as.factor(person)4   0.123132   0.594901   0.207    0.83605    
## as.factor(person)5  -1.221613   0.384218  -3.179    0.00150 ** 
## as.factor(person)6   0.682249   1.334056   0.511    0.60914    
## as.factor(person)7  -0.133064   0.602578  -0.221    0.82526    
## as.factor(person)8  -0.281718   2.964826  -0.095    0.92431    
## as.factor(person)9  -0.354659   0.761349  -0.466    0.64140    
## as.factor(person)10 -0.367622   1.978114  -0.186    0.85259    
## as.factor(person)11  1.110915   3.203989   0.347    0.72884    
## as.factor(person)12  0.477826   2.060144   0.232    0.81662    
## as.factor(person)13  1.187416   3.371969   0.352    0.72478    
## as.factor(person)14 -0.023231   1.098891  -0.021    0.98314    
## as.factor(person)15  0.219048   1.417092   0.155    0.87718    
## as.factor(person)16  0.493176   2.716483   0.182    0.85596    
## as.factor(person)17  1.146691   4.122914   0.278    0.78095    
## as.factor(person)18  0.542061   4.193835   0.129    0.89717    
## as.factor(person)19  0.719695   1.334056   0.539    0.58963    
## as.factor(person)20  1.498543   2.963639   0.506    0.61318    
## as.factor(person)21  1.104357   4.266322   0.259    0.79578    
## as.factor(person)22 -0.702777   0.776235  -0.905    0.36541    
## as.factor(person)23  0.106271   0.453017   0.235    0.81456    
## as.factor(person)24 -0.571459   1.357893  -0.421    0.67393    
## as.factor(person)25 -0.289934   1.820636  -0.159    0.87349    
## as.factor(person)26  0.477732   2.558154   0.187    0.85188    
## as.factor(person)27  0.600111   3.375136   0.178    0.85890    
## as.factor(person)28  0.579976   2.896383   0.200    0.84132    
## as.factor(person)29  0.147055   2.389453   0.062    0.95093    
## as.factor(person)30 -1.213550   1.980442  -0.613    0.54012    
## as.factor(person)31 -1.063418   3.373897  -0.315    0.75266    
## as.factor(person)32  0.080251   0.557841   0.144    0.88563    
## as.factor(person)33  1.033991   4.691802   0.220    0.82560    
## as.factor(person)34 -1.048944   0.219302  -4.783 0.00000189 ***
## as.factor(person)35  0.800893   1.739416   0.460    0.64527    
## as.factor(person)36 -0.233985   0.989035  -0.237    0.81301    
## as.factor(person)37 -0.340023   0.551710  -0.616    0.53778    
## as.factor(person)38  0.664940   1.901703   0.350    0.72664    
## as.factor(person)39 -0.493901   1.759288  -0.281    0.77895    
## as.factor(person)40  0.483949   2.389453   0.203    0.83952    
## as.factor(person)41  0.756165   1.990453   0.380    0.70407    
## as.factor(person)42 -0.350022   0.487242  -0.718    0.47263    
## as.factor(person)43  1.478191   4.601803   0.321    0.74809    
## as.factor(person)44  0.696375   1.744784   0.399    0.68986    
## as.factor(person)45 -1.401859   0.761350  -1.841    0.06577 .  
## as.factor(person)46  1.556486   5.011757   0.311    0.75617    
## as.factor(person)47  0.627801   3.611950   0.174    0.86204    
## as.factor(person)48 -0.224764   0.613448  -0.366    0.71412    
## as.factor(person)49  2.398449   2.963331   0.809    0.41842    
## as.factor(person)50 -0.675815   0.234218  -2.885    0.00396 ** 
## as.factor(person)51 -0.654761   3.783755  -0.173    0.86264    
## as.factor(person)52  0.798271   1.824574   0.438    0.66180    
## as.factor(person)53  0.254131   2.146520   0.118    0.90577    
## as.factor(person)54 -0.095449   4.924972  -0.019    0.98454    
## as.factor(person)55  0.576077   0.347161   1.659    0.09724 .  
## as.factor(person)56  0.283898   0.787021   0.361    0.71835    
## as.factor(person)57 -0.716431   1.641590  -0.436    0.66259    
## as.factor(person)58 -0.146621   0.861726  -0.170    0.86492    
## as.factor(person)59  1.187977   1.739788   0.683    0.49482    
## as.factor(person)60  0.095620   1.334056   0.072    0.94287    
## as.factor(person)61 -0.483417   1.571585  -0.308    0.75843    
## as.factor(person)62 -0.427260   2.388930  -0.179    0.85808    
## as.factor(person)63  2.108865   3.713308   0.568    0.57017    
## as.factor(person)64 -0.307102   0.938117  -0.327    0.74344    
## as.factor(person)65 -0.026107   1.335588  -0.020    0.98441    
## as.factor(person)66  0.298867   2.806711   0.106    0.91521    
## as.factor(person)67 -0.392233   2.869202  -0.137    0.89128    
## as.factor(person)68  1.446472   0.700572   2.065    0.03911 *  
## as.factor(person)69 -0.734596   0.272163  -2.699    0.00703 ** 
## as.factor(person)70  0.223412   0.264186   0.846    0.39787    
## as.factor(person)71  0.177656   0.334595   0.531    0.59552    
## as.factor(person)72  0.132741   0.678126   0.196    0.84483    
## as.factor(person)73  0.359924   0.994736   0.362    0.71753    
## as.factor(person)74  0.327971   0.788095   0.416    0.67735    
## as.factor(person)75  0.022977   0.865110   0.027    0.97881    
## as.factor(person)76  1.110517   2.716483   0.409    0.68274    
## as.factor(person)77  1.766549   3.896585   0.453    0.65035    
## as.factor(person)78  0.402196   0.758975   0.530    0.59624    
## as.factor(person)79  0.018154   3.783334   0.005    0.99617    
## as.factor(person)80 -1.165715   0.336220  -3.467    0.00054 ***
## as.factor(person)81  1.111121   2.965305   0.375    0.70793    
## as.factor(person)82  0.664343   5.245187   0.127    0.89923    
## as.factor(person)83        NA         NA      NA         NA    
## as.factor(person)84        NA         NA      NA         NA    
## as.factor(person)85        NA         NA      NA         NA    
## cyear:sexM          -0.018738   0.005972  -3.138    0.00173 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7437 on 1574 degrees of freedom
## Multiple R-squared:  0.6323, Adjusted R-squared:  0.6122 
## F-statistic: 31.47 on 86 and 1574 DF,  p-value: < 0.00000000000000022
#Run a mixed-effects model with random intercept and random slope of year
#Syntax for "lme4" library
income.mod <- lmer(log(income)~cyear*sex+age+educ+(cyear|person), data=psid)
summary(income.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ cyear * sex + age + educ + (cyear | person)
##    Data: psid
## 
## REML criterion at convergence: 3819.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -10.2310  -0.2134   0.0795   0.4147   2.8254 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  person   (Intercept) 0.2817   0.53071      
##           cyear       0.0024   0.04899  0.19
##  Residual             0.4673   0.68357      
## Number of obs: 1661, groups:  person, 85
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  6.67420    0.54332  12.284
## cyear        0.08531    0.00900   9.480
## sexM         1.15031    0.12129   9.484
## age          0.01093    0.01352   0.808
## educ         0.10421    0.02144   4.861
## cyear:sexM  -0.02631    0.01224  -2.150
## 
## Correlation of Fixed Effects:
##            (Intr) cyear  sexM   age    educ  
## cyear       0.020                            
## sexM       -0.104 -0.098                     
## age        -0.874  0.002 -0.026              
## educ       -0.597  0.000  0.008  0.167       
## cyear:sexM -0.003 -0.735  0.156 -0.010 -0.011
#Syntax for "lme" library
income.mod.2 <- lme(log(income)~cyear*sex+age+educ+cyear, 
                    random=~cyear|person, data=psid)
summary(income.mod.2)
## Linear mixed-effects model fit by REML
##  Data: psid 
##        AIC      BIC    logLik
##   3839.776 3893.892 -1909.888
## 
## Random effects:
##  Formula: ~cyear | person
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev     Corr  
## (Intercept) 0.53071321 (Intr)
## cyear       0.04898952 0.187 
## Residual    0.68357323       
## 
## Fixed effects: log(income) ~ cyear * sex + age + educ + cyear 
##                 Value Std.Error   DF   t-value p-value
## (Intercept)  6.674204 0.5433252 1574 12.283995  0.0000
## cyear        0.085312 0.0089996 1574  9.479521  0.0000
## sexM         1.150313 0.1212925   81  9.483790  0.0000
## age          0.010932 0.0135238   81  0.808342  0.4213
## educ         0.104210 0.0214366   81  4.861287  0.0000
## cyear:sexM  -0.026307 0.0122378 1574 -2.149607  0.0317
##  Correlation: 
##            (Intr) cyear  sexM   age    educ  
## cyear       0.020                            
## sexM       -0.104 -0.098                     
## age        -0.874  0.002 -0.026              
## educ       -0.597  0.000  0.008  0.167       
## cyear:sexM -0.003 -0.735  0.156 -0.010 -0.011
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -10.23102885  -0.21344108   0.07945029   0.41471605   2.82543559 
## 
## Number of Observations: 1661
## Number of Groups: 85
###Predicting Random Effects### 
#initialize variables
rand.eff<-matrix(NA,nrow=length(table(psid$person)),ncol=2)

#Create Covariance Matrix of Random Effects
a<-c(0.2816564, 0.53071*0.04899*0.187)
b<-c(0.53071*0.04899*0.187, 0.0024)
G<-rbind(a,b)
sigma.2<-0.4672724

#Add residuals to data set
psid$resid<-residuals(income.mod)

#Predict Random Effects with BLUP
for(i in psid$person){
  z<-cbind(1, psid$cyear[psid$person==i])
  Sigma<-z%*%G%*%t(z)+diag(x=sigma.2, nrow=nrow(z))
  rand.eff[i,]<-G%*%t(z)%*%solve(Sigma)%*%as.matrix(psid$resid[psid$person==i])
}
head(rand.eff)
##              [,1]         [,2]
## [1,]  0.008126650  0.005794171
## [2,] -0.004270293  0.003262456
## [3,]  0.006876111 -0.007389998
## [4,]  0.010418173 -0.001665729
## [5,] -0.022921505 -0.017969627
## [6,]  0.008693688  0.003539400
#create individual predicted coefficient vectors
ind.coefs<-matrix(income.mod@beta,
                  nrow=length(table(psid$person)),
                  ncol=6,byrow=T)+cbind(rand.eff,0,0,0,0)
head(ind.coefs)
##          [,1]       [,2]     [,3]       [,4]      [,5]        [,6]
## [1,] 6.682331 0.09110630 1.150313 0.01093182 0.1042097 -0.02630651
## [2,] 6.669934 0.08857458 1.150313 0.01093182 0.1042097 -0.02630651
## [3,] 6.681081 0.07792213 1.150313 0.01093182 0.1042097 -0.02630651
## [4,] 6.684623 0.08364640 1.150313 0.01093182 0.1042097 -0.02630651
## [5,] 6.651283 0.06734250 1.150313 0.01093182 0.1042097 -0.02630651
## [6,] 6.682898 0.08885153 1.150313 0.01093182 0.1042097 -0.02630651
#predictions accounting for random effects, individual 1
psid$const<-1
psid.no.1<-psid[psid$person==1,c("const","cyear","sex","age","educ")]
psid.no.1$sex<-1
psid.no.1$inter<-psid.no.1$sex*psid.no.1$cyear
predictions.1<-as.matrix(psid.no.1)%*%ind.coefs[1,]

#graph in two forms
plot(y=predictions.1,x=psid.no.1$cyear+1978,
     ylab="Predicted Logged Income",
     xlab="Year",main="Respondent 1",type='l')

plot(y=exp(predictions.1),x=psid.no.1$cyear+1978,
     ylab="Predicted Income",
     xlab="Year",main="Respondent 1",type='l')

###Residual Analyses###
#initialize variables
psid$std<-rep(NA,nrow(psid))
psid$d<-rep(NA,nrow(psid))
psid$p<-rep(NA,nrow(psid))

#Create Standardized Residuals
for(i in psid$person){
z<-cbind(1, psid$cyear[psid$person==i])
Sigma<-z%*%G%*%t(z)+diag(x=sigma.2, nrow=nrow(z))
tL<-chol(Sigma)
inv.lower<-solve(t(tL))
psid$std[psid$person==i]<-inv.lower%*%as.matrix(psid$resid[psid$person==i])
}

#Mahalanobis Distance
for(i in psid$person){
r<-as.vector(psid$std[psid$person==i])
psid$d[psid$person==i]<-t(r)%*%r
psid$p[psid$person==i]<-1-pchisq(psid$d[i],df=length(r))
}

table(psid$person[psid$p<.05])
## 
## 62 64 66 67 69 70 71 73 74 82 83 84 85 
## 15 14 16 12 14 17 12 16 13 23 23 23 22
#Figures
#Histogram & Density Plot of Regular Residuals
hist(psid$resid)

densityplot(psid$resid)

#Histogram & Density Plot of Transformed Residuals
hist(psid$std)

densityplot(psid$std)

#Transformed Residuals Against Fitted Values
mod.fit<-fitted(income.mod)
plot(y=psid$std, x=mod.fit)

#Transformed Residuals Against Time
plot(y=psid$std, x=psid$cyear)

#Semi-Variogram (crafted for "nlme" library)
plot(Variogram(income.mod.2, form=~cyear|person), ylim=c(0,1.2))

##############################################################

#data
data(psid)

#Create a variable: "centered year"
psid$cyear <- psid$year-78

#Run a mixed-effects model with random intercept and random slope of year
#Syntax for "lme4" library
income.mod <- lmer(log(income)~cyear*sex+age+educ+(cyear|person), data=psid)
summary(income.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ cyear * sex + age + educ + (cyear | person)
##    Data: psid
## 
## REML criterion at convergence: 3819.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -10.2310  -0.2134   0.0795   0.4147   2.8254 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  person   (Intercept) 0.2817   0.53071      
##           cyear       0.0024   0.04899  0.19
##  Residual             0.4673   0.68357      
## Number of obs: 1661, groups:  person, 85
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  6.67420    0.54332  12.284
## cyear        0.08531    0.00900   9.480
## sexM         1.15031    0.12129   9.484
## age          0.01093    0.01352   0.808
## educ         0.10421    0.02144   4.861
## cyear:sexM  -0.02631    0.01224  -2.150
## 
## Correlation of Fixed Effects:
##            (Intr) cyear  sexM   age    educ  
## cyear       0.020                            
## sexM       -0.104 -0.098                     
## age        -0.874  0.002 -0.026              
## educ       -0.597  0.000  0.008  0.167       
## cyear:sexM -0.003 -0.735  0.156 -0.010 -0.011
#Example of how to create a variable: year observed
for(i in psid$person){
    psid$year.2[psid$person==i]<-(psid$year[psid$person==i][1])
    }

#Create a more interesting variable: age over time
for(i in psid$person){
    psid$age.2[psid$person==i]<-c(1:length(psid$person[psid$person==i]))-1
    }

#Age all jumbled together
psid$age.3<-psid$age+psid$age.2

#Decompose the cross-sectional and longitudinal effects of age
income.mod.1 <- lmer(log(income)~age+age.2+educ+(1|person), data=psid)
summary(income.mod.1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ age + age.2 + educ + (1 | person)
##    Data: psid
## 
## REML criterion at convergence: 4002.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.3637 -0.2030  0.1262  0.4591  2.5084 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  person   (Intercept) 0.5452   0.7384  
##  Residual             0.5523   0.7432  
## Number of obs: 1661, groups:  person, 85
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 6.684690   0.745378   8.968
## age         0.011047   0.018652   0.592
## age.2       0.073405   0.003081  23.826
## educ        0.103059   0.029608   3.481
## 
## Correlation of Fixed Effects:
##       (Intr) age    age.2 
## age   -0.881              
## age.2 -0.024 -0.008       
## educ  -0.599  0.166 -0.017
#Jumble everything together
income.mod.2 <- lmer(log(income)~age.3+educ+(1|person), data=psid)
summary(income.mod.2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(income) ~ age.3 + educ + (1 | person)
##    Data: psid
## 
## REML criterion at convergence: 4006.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.3332 -0.2057  0.1272  0.4548  2.4957 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  person   (Intercept) 0.6115   0.7820  
##  Residual             0.5524   0.7432  
## Number of obs: 1661, groups:  person, 85
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 4.557267   0.393758  11.574
## age.3       0.071814   0.003041  23.614
## educ        0.119273   0.030830   3.869
## 
## Correlation of Fixed Effects:
##       (Intr) age.3 
## age.3 -0.330       
## educ  -0.921  0.011
#We're probably picking-up the effect of year here.

##############################################################

0.2.6 Time-Series Cross-Section Models

library(plm)
## Loading required package: Formula
## 
## Attaching package: 'plm'
## The following object is masked from 'package:timeSeries':
## 
##     lag
library(lmtest)
library(pcse)
## 
## Attaching package: 'pcse'
## The following object is masked from 'package:sandwich':
## 
##     vcovPC
library(nlme)
library(foreign)

#load data # Simulated Panel of 50 States Over 10 Years
#state <-read.dta("http://spia.uga.edu/faculty_pages/monogan/teaching/pd/STATE2.DTA",
#                 convert.factors=FALSE)
state <-read.dta("STATE2.DTA",convert.factors=FALSE)
### UNIT EFFECTS ###
#Is the mean of y the same across units?
anova.mod<-aov(y~state,data=state)
summary(anova.mod)
##              Df  Sum Sq Mean Sq F value Pr(>F)
## state         1     210     210   0.065  0.799
## Residuals   498 1619256    3252
#With an OLS model, is the mean of the residuals the same across units?
main.model <- lm(y~x, data=state); summary(main.model)
## 
## Call:
## lm(formula = y ~ x, data = state)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26.99 -11.93  -0.25  12.94  24.73 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 50.76900    0.63107   80.45   <2e-16 ***
## x            1.96500    0.02249   87.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.11 on 498 degrees of freedom
## Multiple R-squared:  0.9388, Adjusted R-squared:  0.9386 
## F-statistic:  7635 on 1 and 498 DF,  p-value: < 0.00000000000000022
anova.resids <- aov(main.model$residuals~state$state)
summary(anova.resids)
##              Df Sum Sq Mean Sq F value Pr(>F)
## state$state   1      3    2.75   0.014  0.907
## Residuals   498  99161  199.12
#fixed effects model
fe.mod <- plm(y~x, data=state,index=c("state","time"),model="within")

summary(fe.mod)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = y ~ x, data = state, model = "within", index = c("state", 
##     "time"))
## 
## Balanced Panel: n = 50, T = 10, N = 500
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -31.5999 -10.4457  -0.8056  10.7443  31.9038 
## 
## Coefficients:
##   Estimate Std. Error t-value              Pr(>|t|)    
## x 1.969086   0.023423  84.066 < 0.00000000000000022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1478100
## Residual Sum of Squares: 88300
## R-Squared:      0.94026
## Adj. R-Squared: 0.93361
## F-statistic: 7067.11 on 1 and 449 DF, p-value: < 0.000000000000000222
#random effects model
re.mod <- plm(y~x, data=state,index=c("state","time"),model="random")

summary(re.mod)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = y ~ x, data = state, model = "random", index = c("state", 
##     "time"))
## 
## Balanced Panel: n = 50, T = 10, N = 500
## 
## Effects:
##                   var std.dev share
## idiosyncratic 196.660  14.024 0.986
## individual      2.827   1.681 0.014
## theta: 0.06495
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -27.26881 -11.79069  -0.37252  12.90005  24.77429 
## 
## Coefficients:
##             Estimate Std. Error t-value              Pr(>|t|)    
## (Intercept) 50.76900    0.67025  75.747 < 0.00000000000000022 ***
## x            1.96547    0.02246  87.510 < 0.00000000000000022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1601700
## Residual Sum of Squares: 97799
## R-Squared:      0.93894
## Adj. R-Squared: 0.93882
## F-statistic: 7657.97 on 1 and 498 DF, p-value: < 0.000000000000000222
#Is random effects a sufficient specification?
phtest(fe.mod, re.mod)
## 
##  Hausman Test
## 
## data:  y ~ x
## chisq = 0.29526, df = 1, p-value = 0.5869
## alternative hypothesis: one model is inconsistent
### SERIAL CORRELATION AND HETEROSCEDASTICITY ###
#Are residuals autocorrelated?
pbgtest(fe.mod)
## 
##  Breusch-Godfrey/Wooldridge test for serial correlation in panel
##  models
## 
## data:  y ~ x
## chisq = 60.81, df = 10, p-value = 0.000000002546
## alternative hypothesis: serial correlation in idiosyncratic errors
pbgtest(re.mod)
## 
##  Breusch-Godfrey/Wooldridge test for serial correlation in panel
##  models
## 
## data:  y ~ x
## chisq = 5.7469, df = 10, p-value = 0.8361
## alternative hypothesis: serial correlation in idiosyncratic errors
#Is there heteroscedasticity across units?
by(data=fe.mod$residuals, INDICES=state$state,FUN=sd)
## state$state: 1
## [1] 11.81807
## -------------------------------------------------------- 
## state$state: 2
## [1] 13.89172
## -------------------------------------------------------- 
## state$state: 3
## [1] 15.4049
## -------------------------------------------------------- 
## state$state: 4
## [1] 12.51429
## -------------------------------------------------------- 
## state$state: 5
## [1] 17.57407
## -------------------------------------------------------- 
## state$state: 6
## [1] 11.97787
## -------------------------------------------------------- 
## state$state: 7
## [1] 16.49319
## -------------------------------------------------------- 
## state$state: 8
## [1] 10.86398
## -------------------------------------------------------- 
## state$state: 9
## [1] 14.66044
## -------------------------------------------------------- 
## state$state: 10
## [1] 15.02289
## -------------------------------------------------------- 
## state$state: 11
## [1] 15.12577
## -------------------------------------------------------- 
## state$state: 12
## [1] 16.64692
## -------------------------------------------------------- 
## state$state: 13
## [1] 16.59279
## -------------------------------------------------------- 
## state$state: 14
## [1] 12.14405
## -------------------------------------------------------- 
## state$state: 15
## [1] 14.34336
## -------------------------------------------------------- 
## state$state: 16
## [1] 16.13807
## -------------------------------------------------------- 
## state$state: 17
## [1] 10.88982
## -------------------------------------------------------- 
## state$state: 18
## [1] 16.20893
## -------------------------------------------------------- 
## state$state: 19
## [1] 11.50404
## -------------------------------------------------------- 
## state$state: 20
## [1] 16.14991
## -------------------------------------------------------- 
## state$state: 21
## [1] 11.24411
## -------------------------------------------------------- 
## state$state: 22
## [1] 14.53759
## -------------------------------------------------------- 
## state$state: 23
## [1] 16.11378
## -------------------------------------------------------- 
## state$state: 24
## [1] 14.48975
## -------------------------------------------------------- 
## state$state: 25
## [1] 11.10953
## -------------------------------------------------------- 
## state$state: 26
## [1] 12.7745
## -------------------------------------------------------- 
## state$state: 27
## [1] 14.9713
## -------------------------------------------------------- 
## state$state: 28
## [1] 16.78286
## -------------------------------------------------------- 
## state$state: 29
## [1] 10.24506
## -------------------------------------------------------- 
## state$state: 30
## [1] 10.06918
## -------------------------------------------------------- 
## state$state: 31
## [1] 13.33506
## -------------------------------------------------------- 
## state$state: 32
## [1] 14.03133
## -------------------------------------------------------- 
## state$state: 33
## [1] 12.48712
## -------------------------------------------------------- 
## state$state: 34
## [1] 13.17779
## -------------------------------------------------------- 
## state$state: 35
## [1] 9.262702
## -------------------------------------------------------- 
## state$state: 36
## [1] 19.28064
## -------------------------------------------------------- 
## state$state: 37
## [1] 13.92287
## -------------------------------------------------------- 
## state$state: 38
## [1] 11.73758
## -------------------------------------------------------- 
## state$state: 39
## [1] 16.87471
## -------------------------------------------------------- 
## state$state: 40
## [1] 10.13145
## -------------------------------------------------------- 
## state$state: 41
## [1] 15.64463
## -------------------------------------------------------- 
## state$state: 42
## [1] 9.173528
## -------------------------------------------------------- 
## state$state: 43
## [1] 14.43694
## -------------------------------------------------------- 
## state$state: 44
## [1] 13.77322
## -------------------------------------------------------- 
## state$state: 45
## [1] 12.89994
## -------------------------------------------------------- 
## state$state: 46
## [1] 14.90767
## -------------------------------------------------------- 
## state$state: 47
## [1] 13.03579
## -------------------------------------------------------- 
## state$state: 48
## [1] 16.4042
## -------------------------------------------------------- 
## state$state: 49
## [1] 14.40868
## -------------------------------------------------------- 
## state$state: 50
## [1] 13.5256
by(data=re.mod$residuals, INDICES=state$state,FUN=sd)
## state$state: 1
## [1] 11.82263
## -------------------------------------------------------- 
## state$state: 2
## [1] 13.88126
## -------------------------------------------------------- 
## state$state: 3
## [1] 15.35098
## -------------------------------------------------------- 
## state$state: 4
## [1] 12.5062
## -------------------------------------------------------- 
## state$state: 5
## [1] 17.6232
## -------------------------------------------------------- 
## state$state: 6
## [1] 11.9775
## -------------------------------------------------------- 
## state$state: 7
## [1] 16.53566
## -------------------------------------------------------- 
## state$state: 8
## [1] 10.83712
## -------------------------------------------------------- 
## state$state: 9
## [1] 14.71178
## -------------------------------------------------------- 
## state$state: 10
## [1] 15.01937
## -------------------------------------------------------- 
## state$state: 11
## [1] 15.13247
## -------------------------------------------------------- 
## state$state: 12
## [1] 16.67284
## -------------------------------------------------------- 
## state$state: 13
## [1] 16.59559
## -------------------------------------------------------- 
## state$state: 14
## [1] 12.19081
## -------------------------------------------------------- 
## state$state: 15
## [1] 14.30411
## -------------------------------------------------------- 
## state$state: 16
## [1] 16.15757
## -------------------------------------------------------- 
## state$state: 17
## [1] 10.91339
## -------------------------------------------------------- 
## state$state: 18
## [1] 16.2043
## -------------------------------------------------------- 
## state$state: 19
## [1] 11.5274
## -------------------------------------------------------- 
## state$state: 20
## [1] 16.13781
## -------------------------------------------------------- 
## state$state: 21
## [1] 11.23782
## -------------------------------------------------------- 
## state$state: 22
## [1] 14.48624
## -------------------------------------------------------- 
## state$state: 23
## [1] 16.15662
## -------------------------------------------------------- 
## state$state: 24
## [1] 14.43354
## -------------------------------------------------------- 
## state$state: 25
## [1] 11.08485
## -------------------------------------------------------- 
## state$state: 26
## [1] 12.75815
## -------------------------------------------------------- 
## state$state: 27
## [1] 14.95834
## -------------------------------------------------------- 
## state$state: 28
## [1] 16.82505
## -------------------------------------------------------- 
## state$state: 29
## [1] 10.21616
## -------------------------------------------------------- 
## state$state: 30
## [1] 10.04843
## -------------------------------------------------------- 
## state$state: 31
## [1] 13.31257
## -------------------------------------------------------- 
## state$state: 32
## [1] 14.01391
## -------------------------------------------------------- 
## state$state: 33
## [1] 12.52335
## -------------------------------------------------------- 
## state$state: 34
## [1] 13.16966
## -------------------------------------------------------- 
## state$state: 35
## [1] 9.204659
## -------------------------------------------------------- 
## state$state: 36
## [1] 19.26064
## -------------------------------------------------------- 
## state$state: 37
## [1] 13.88357
## -------------------------------------------------------- 
## state$state: 38
## [1] 11.78806
## -------------------------------------------------------- 
## state$state: 39
## [1] 16.83903
## -------------------------------------------------------- 
## state$state: 40
## [1] 10.15442
## -------------------------------------------------------- 
## state$state: 41
## [1] 15.64971
## -------------------------------------------------------- 
## state$state: 42
## [1] 9.129661
## -------------------------------------------------------- 
## state$state: 43
## [1] 14.4415
## -------------------------------------------------------- 
## state$state: 44
## [1] 13.75224
## -------------------------------------------------------- 
## state$state: 45
## [1] 12.93517
## -------------------------------------------------------- 
## state$state: 46
## [1] 14.88587
## -------------------------------------------------------- 
## state$state: 47
## [1] 13.05417
## -------------------------------------------------------- 
## state$state: 48
## [1] 16.42224
## -------------------------------------------------------- 
## state$state: 49
## [1] 14.43068
## -------------------------------------------------------- 
## state$state: 50
## [1] 13.56733
#Running a random effects model that specifies first-order serial correlation
mod.lme <- lme(y ~ x, data = state, random = ~1 | state, 
               correlation = corAR1(0, form = ~time | state))

summary(mod.lme)
## Linear mixed-effects model fit by REML
##  Data: state 
##        AIC      BIC    logLik
##   4078.056 4099.109 -2034.028
## 
## Random effects:
##  Formula: ~1 | state
##         (Intercept) Residual
## StdDev:    1.930501 13.97998
## 
## Correlation Structure: AR(1)
##  Formula: ~time | state 
##  Parameter estimate(s):
##         Phi 
## -0.03048009 
## Fixed effects: y ~ x 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 50.77004 0.6666945 449 76.15188       0
## x            1.96514 0.0224076 449 87.69965       0
##  Correlation: 
##   (Intr)
## x 0     
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -1.98437665 -0.82812706 -0.04295284  0.92107232  1.78051783 
## 
## Number of Observations: 500
## Number of Groups: 50
#Running a fixed effects model that accounts for first-order serial correlation
fe.gls.model <- gls(y~x+as.factor(state), 
                    correlation=corAR1(0, form=~time|state), 
                    data=state)

summary(fe.gls.model)
## Generalized least squares fit by REML
##   Model: y ~ x + as.factor(state) 
##   Data: state 
##      AIC      BIC   logLik
##   3879.1 4096.772 -1886.55
## 
## Correlation Structure: AR(1)
##  Formula: ~time | state 
##  Parameter estimate(s):
##         Phi 
## -0.03326118 
## 
## Coefficients:
##                       Value Std.Error  t-value p-value
## (Intercept)        38.51014  4.303892  8.94775  0.0000
## x                   1.96878  0.023354 84.30015  0.0000
## as.factor(state)2  15.21545  6.087978  2.49926  0.0128
## as.factor(state)3   6.10203  6.082887  1.00315  0.3163
## as.factor(state)4  12.39929  6.068969  2.04306  0.0416
## as.factor(state)5   9.12632  6.082922  1.50032  0.1342
## as.factor(state)6  12.93973  6.071881  2.13109  0.0336
## as.factor(state)7  16.78769  6.097093  2.75339  0.0061
## as.factor(state)8  18.67235  6.110262  3.05590  0.0024
## as.factor(state)9   8.04805  6.067793  1.32636  0.1854
## as.factor(state)10 11.31959  6.069784  1.86491  0.0628
## as.factor(state)11 14.30927  6.082237  2.35263  0.0191
## as.factor(state)12 15.77289  6.072210  2.59755  0.0097
## as.factor(state)13  9.53445  6.077853  1.56872  0.1174
## as.factor(state)14 10.23571  6.067146  1.68707  0.0923
## as.factor(state)15 20.36892  6.075082  3.35286  0.0009
## as.factor(state)16 15.80246  6.077278  2.60025  0.0096
## as.factor(state)17 18.18053  6.087073  2.98674  0.0030
## as.factor(state)18 12.11560  6.095337  1.98768  0.0475
## as.factor(state)19 14.28269  6.085891  2.34685  0.0194
## as.factor(state)20 11.02179  6.066479  1.81684  0.0699
## as.factor(state)21 13.82819  6.066234  2.27953  0.0231
## as.factor(state)22 15.94378  6.067758  2.62762  0.0089
## as.factor(state)23  8.41616  6.099328  1.37985  0.1683
## as.factor(state)24  5.99225  6.123529  0.97856  0.3283
## as.factor(state)25 12.83281  6.086141  2.10853  0.0355
## as.factor(state)26 14.58066  6.082256  2.39725  0.0169
## as.factor(state)27  7.82404  6.066380  1.28974  0.1978
## as.factor(state)28 17.02825  6.070942  2.80488  0.0053
## as.factor(state)29 11.16618  6.076695  1.83754  0.0668
## as.factor(state)30 14.09157  6.067493  2.32247  0.0207
## as.factor(state)31 17.44543  6.066947  2.87549  0.0042
## as.factor(state)32 15.10688  6.075537  2.48651  0.0133
## as.factor(state)33 10.82480  6.094083  1.77628  0.0764
## as.factor(state)34  5.66005  6.074270  0.93181  0.3519
## as.factor(state)35 16.98846  6.073798  2.79701  0.0054
## as.factor(state)36 11.23724  6.082011  1.84762  0.0653
## as.factor(state)37 10.71316  6.102103  1.75565  0.0798
## as.factor(state)38  6.39052  6.070020  1.05280  0.2930
## as.factor(state)39 13.34745  6.069346  2.19916  0.0284
## as.factor(state)40 21.82623  6.081142  3.58917  0.0004
## as.factor(state)41 19.56149  6.069372  3.22298  0.0014
## as.factor(state)42 15.34824  6.078588  2.52497  0.0119
## as.factor(state)43  4.43297  6.070927  0.73020  0.4657
## as.factor(state)44  8.72527  6.081059  1.43483  0.1520
## as.factor(state)45  3.83766  6.075919  0.63162  0.5280
## as.factor(state)46  6.90064  6.068853  1.13706  0.2561
## as.factor(state)47 11.07919  6.105142  1.81473  0.0702
## as.factor(state)48 15.43003  6.082025  2.53699  0.0115
## as.factor(state)49  7.11239  6.086309  1.16859  0.2432
## as.factor(state)50 17.09450  6.072454  2.81509  0.0051
## 
##  Correlation: 
##                    (Intr) x      as.()2 as.()3 as.()4 as.()5 as.()6 as.()7
## x                  -0.082                                                 
## as.factor(state)2  -0.709  0.085                                          
## as.factor(state)3  -0.709  0.074  0.503                                   
## as.factor(state)4  -0.707  0.030  0.501  0.501                            
## as.factor(state)5  -0.709  0.074  0.503  0.503  0.501                     
## as.factor(state)6  -0.708  0.043  0.501  0.501  0.501  0.501              
## as.factor(state)7  -0.709  0.101  0.504  0.504  0.500  0.504  0.501       
## as.factor(state)8  -0.709  0.120  0.505  0.504  0.500  0.504  0.501  0.506
## as.factor(state)9  -0.706  0.023  0.500  0.500  0.500  0.500  0.500  0.500
## as.factor(state)10 -0.707  0.034  0.501  0.501  0.501  0.501  0.501  0.501
## as.factor(state)11 -0.709  0.073  0.503  0.503  0.501  0.503  0.501  0.503
## as.factor(state)12 -0.708  0.045  0.501  0.501  0.501  0.501  0.501  0.501
## as.factor(state)13 -0.708  0.062  0.502  0.502  0.501  0.502  0.501  0.503
## as.factor(state)14 -0.706  0.018  0.500  0.500  0.500  0.500  0.500  0.499
## as.factor(state)15 -0.708  0.054  0.502  0.502  0.501  0.502  0.501  0.502
## as.factor(state)16 -0.708  0.060  0.502  0.502  0.501  0.502  0.501  0.503
## as.factor(state)17 -0.709  0.083  0.503  0.503  0.501  0.503  0.501  0.504
## as.factor(state)18 -0.709  0.098  0.504  0.503  0.500  0.503  0.501  0.505
## as.factor(state)19 -0.709  0.080  0.503  0.503  0.501  0.503  0.501  0.504
## as.factor(state)20 -0.706  0.010  0.499  0.499  0.500  0.499  0.500  0.498
## as.factor(state)21 -0.704 -0.004  0.498  0.498  0.500  0.498  0.499  0.497
## as.factor(state)22 -0.706  0.023  0.500  0.500  0.500  0.500  0.500  0.500
## as.factor(state)23 -0.709  0.104  0.504  0.504  0.500  0.504  0.501  0.505
## as.factor(state)24 -0.709  0.137  0.505  0.504  0.499  0.504  0.501  0.507
## as.factor(state)25 -0.709  0.081  0.503  0.503  0.501  0.503  0.501  0.504
## as.factor(state)26 -0.709  0.073  0.503  0.503  0.501  0.503  0.501  0.503
## as.factor(state)27 -0.705  0.008  0.499  0.499  0.500  0.499  0.500  0.498
## as.factor(state)28 -0.707  0.040  0.501  0.501  0.501  0.501  0.501  0.501
## as.factor(state)29 -0.708  0.059  0.502  0.502  0.501  0.502  0.501  0.503
## as.factor(state)30 -0.706  0.021  0.500  0.500  0.500  0.500  0.500  0.499
## as.factor(state)31 -0.706  0.016  0.499  0.500  0.500  0.500  0.500  0.499
## as.factor(state)32 -0.708  0.055  0.502  0.502  0.501  0.502  0.501  0.502
## as.factor(state)33 -0.709  0.096  0.504  0.503  0.500  0.503  0.501  0.505
## as.factor(state)34 -0.708  0.052  0.502  0.502  0.501  0.502  0.501  0.502
## as.factor(state)35 -0.708  0.050  0.502  0.502  0.501  0.502  0.501  0.502
## as.factor(state)36 -0.709  0.072  0.503  0.503  0.501  0.503  0.501  0.503
## as.factor(state)37 -0.709  0.108  0.504  0.504  0.500  0.504  0.501  0.505
## as.factor(state)38 -0.707  0.036  0.501  0.501  0.501  0.501  0.501  0.501
## as.factor(state)39 -0.707  0.032  0.501  0.501  0.500  0.501  0.501  0.500
## as.factor(state)40 -0.709  0.070  0.503  0.503  0.501  0.503  0.501  0.503
## as.factor(state)41 -0.707  0.032  0.501  0.501  0.500  0.501  0.501  0.500
## as.factor(state)42 -0.709  0.064  0.503  0.502  0.501  0.502  0.501  0.503
## as.factor(state)43 -0.707  0.040  0.501  0.501  0.501  0.501  0.501  0.501
## as.factor(state)44 -0.709  0.070  0.503  0.503  0.501  0.503  0.501  0.503
## as.factor(state)45 -0.708  0.057  0.502  0.502  0.501  0.502  0.501  0.502
## as.factor(state)46 -0.707  0.030  0.500  0.501  0.500  0.501  0.501  0.500
## as.factor(state)47 -0.709  0.113  0.505  0.504  0.500  0.504  0.501  0.506
## as.factor(state)48 -0.709  0.072  0.503  0.503  0.501  0.503  0.501  0.503
## as.factor(state)49 -0.709  0.081  0.503  0.503  0.501  0.503  0.501  0.504
## as.factor(state)50 -0.708  0.045  0.502  0.501  0.501  0.501  0.501  0.502
##                    as.()8 as.()9 a.()10 a.()11 a.()12 a.()13 a.()14 a.()15
## x                                                                         
## as.factor(state)2                                                         
## as.factor(state)3                                                         
## as.factor(state)4                                                         
## as.factor(state)5                                                         
## as.factor(state)6                                                         
## as.factor(state)7                                                         
## as.factor(state)8                                                         
## as.factor(state)9   0.499                                                 
## as.factor(state)10  0.500  0.500                                          
## as.factor(state)11  0.504  0.500  0.501                                   
## as.factor(state)12  0.501  0.500  0.501  0.501                            
## as.factor(state)13  0.503  0.500  0.501  0.502  0.501                     
## as.factor(state)14  0.498  0.500  0.500  0.500  0.500  0.500              
## as.factor(state)15  0.502  0.500  0.501  0.502  0.501  0.502  0.500       
## as.factor(state)16  0.503  0.500  0.501  0.502  0.501  0.502  0.500  0.502
## as.factor(state)17  0.505  0.500  0.501  0.503  0.501  0.502  0.500  0.502
## as.factor(state)18  0.506  0.500  0.501  0.503  0.501  0.503  0.499  0.502
## as.factor(state)19  0.504  0.500  0.501  0.503  0.501  0.502  0.500  0.502
## as.factor(state)20  0.498  0.500  0.500  0.499  0.500  0.500  0.500  0.500
## as.factor(state)21  0.496  0.500  0.500  0.498  0.499  0.499  0.500  0.499
## as.factor(state)22  0.499  0.500  0.500  0.500  0.500  0.500  0.500  0.500
## as.factor(state)23  0.506  0.500  0.501  0.504  0.501  0.503  0.499  0.502
## as.factor(state)24  0.508  0.498  0.500  0.504  0.501  0.503  0.498  0.502
## as.factor(state)25  0.504  0.500  0.501  0.503  0.501  0.502  0.500  0.502
## as.factor(state)26  0.504  0.500  0.501  0.503  0.501  0.502  0.500  0.502
## as.factor(state)27  0.497  0.500  0.500  0.499  0.500  0.500  0.500  0.500
## as.factor(state)28  0.501  0.500  0.501  0.501  0.501  0.501  0.500  0.501
## as.factor(state)29  0.503  0.500  0.501  0.502  0.501  0.502  0.500  0.502
## as.factor(state)30  0.499  0.500  0.500  0.500  0.500  0.500  0.500  0.500
## as.factor(state)31  0.498  0.500  0.500  0.500  0.500  0.500  0.500  0.500
## as.factor(state)32  0.502  0.500  0.501  0.502  0.501  0.502  0.500  0.502
## as.factor(state)33  0.506  0.500  0.501  0.503  0.501  0.503  0.499  0.502
## as.factor(state)34  0.502  0.500  0.501  0.502  0.501  0.502  0.500  0.501
## as.factor(state)35  0.502  0.500  0.501  0.502  0.501  0.502  0.500  0.501
## as.factor(state)36  0.504  0.500  0.501  0.503  0.501  0.502  0.500  0.502
## as.factor(state)37  0.506  0.499  0.500  0.504  0.501  0.503  0.499  0.502
## as.factor(state)38  0.500  0.500  0.501  0.501  0.501  0.501  0.500  0.501
## as.factor(state)39  0.500  0.500  0.501  0.501  0.501  0.501  0.500  0.501
## as.factor(state)40  0.504  0.500  0.501  0.503  0.501  0.502  0.500  0.502
## as.factor(state)41  0.500  0.500  0.501  0.501  0.501  0.501  0.500  0.501
## as.factor(state)42  0.503  0.500  0.501  0.502  0.501  0.502  0.500  0.502
## as.factor(state)43  0.501  0.500  0.501  0.501  0.501  0.501  0.500  0.501
## as.factor(state)44  0.504  0.500  0.501  0.503  0.501  0.502  0.500  0.502
## as.factor(state)45  0.502  0.500  0.501  0.502  0.501  0.502  0.500  0.502
## as.factor(state)46  0.500  0.500  0.501  0.501  0.501  0.501  0.500  0.501
## as.factor(state)47  0.507  0.499  0.500  0.504  0.501  0.503  0.499  0.502
## as.factor(state)48  0.504  0.500  0.501  0.503  0.501  0.502  0.500  0.502
## as.factor(state)49  0.504  0.500  0.501  0.503  0.501  0.502  0.500  0.502
## as.factor(state)50  0.501  0.500  0.501  0.501  0.501  0.501  0.500  0.501
##                    a.()16 a.()17 a.()18 a.()19 a.()20 a.()21 a.()22 a.()23
## x                                                                         
## as.factor(state)2                                                         
## as.factor(state)3                                                         
## as.factor(state)4                                                         
## as.factor(state)5                                                         
## as.factor(state)6                                                         
## as.factor(state)7                                                         
## as.factor(state)8                                                         
## as.factor(state)9                                                         
## as.factor(state)10                                                        
## as.factor(state)11                                                        
## as.factor(state)12                                                        
## as.factor(state)13                                                        
## as.factor(state)14                                                        
## as.factor(state)15                                                        
## as.factor(state)16                                                        
## as.factor(state)17  0.502                                                 
## as.factor(state)18  0.503  0.504                                          
## as.factor(state)19  0.502  0.503  0.504                                   
## as.factor(state)20  0.500  0.499  0.499  0.499                            
## as.factor(state)21  0.499  0.498  0.497  0.498  0.500                     
## as.factor(state)22  0.500  0.500  0.500  0.500  0.500  0.500              
## as.factor(state)23  0.503  0.504  0.505  0.504  0.498  0.497  0.500       
## as.factor(state)24  0.503  0.505  0.506  0.505  0.497  0.495  0.498  0.507
## as.factor(state)25  0.502  0.503  0.504  0.503  0.499  0.498  0.500  0.504
## as.factor(state)26  0.502  0.503  0.503  0.503  0.499  0.498  0.500  0.504
## as.factor(state)27  0.500  0.499  0.498  0.499  0.500  0.500  0.500  0.498
## as.factor(state)28  0.501  0.501  0.501  0.501  0.500  0.499  0.500  0.501
## as.factor(state)29  0.502  0.502  0.502  0.502  0.500  0.499  0.500  0.503
## as.factor(state)30  0.500  0.500  0.500  0.500  0.500  0.500  0.500  0.499
## as.factor(state)31  0.500  0.500  0.499  0.500  0.500  0.500  0.500  0.499
## as.factor(state)32  0.502  0.502  0.502  0.502  0.500  0.499  0.500  0.502
## as.factor(state)33  0.503  0.504  0.505  0.504  0.499  0.497  0.500  0.505
## as.factor(state)34  0.502  0.502  0.502  0.502  0.500  0.499  0.500  0.502
## as.factor(state)35  0.501  0.502  0.502  0.502  0.500  0.499  0.500  0.502
## as.factor(state)36  0.502  0.503  0.503  0.503  0.499  0.498  0.500  0.503
## as.factor(state)37  0.503  0.504  0.505  0.504  0.498  0.497  0.499  0.506
## as.factor(state)38  0.501  0.501  0.501  0.501  0.500  0.500  0.500  0.501
## as.factor(state)39  0.501  0.501  0.501  0.501  0.500  0.500  0.500  0.500
## as.factor(state)40  0.502  0.503  0.503  0.503  0.499  0.498  0.500  0.503
## as.factor(state)41  0.501  0.501  0.501  0.501  0.500  0.500  0.500  0.500
## as.factor(state)42  0.502  0.503  0.503  0.502  0.500  0.499  0.500  0.503
## as.factor(state)43  0.501  0.501  0.501  0.501  0.500  0.499  0.500  0.501
## as.factor(state)44  0.502  0.503  0.503  0.503  0.499  0.498  0.500  0.503
## as.factor(state)45  0.502  0.502  0.502  0.502  0.500  0.499  0.500  0.502
## as.factor(state)46  0.501  0.501  0.500  0.501  0.500  0.500  0.500  0.500
## as.factor(state)47  0.503  0.504  0.505  0.504  0.498  0.496  0.499  0.506
## as.factor(state)48  0.502  0.503  0.503  0.503  0.499  0.498  0.500  0.503
## as.factor(state)49  0.502  0.503  0.504  0.503  0.499  0.498  0.500  0.504
## as.factor(state)50  0.501  0.502  0.502  0.502  0.500  0.499  0.500  0.501
##                    a.()24 a.()25 a.()26 a.()27 a.()28 a.()29 a.()30 a.()31
## x                                                                         
## as.factor(state)2                                                         
## as.factor(state)3                                                         
## as.factor(state)4                                                         
## as.factor(state)5                                                         
## as.factor(state)6                                                         
## as.factor(state)7                                                         
## as.factor(state)8                                                         
## as.factor(state)9                                                         
## as.factor(state)10                                                        
## as.factor(state)11                                                        
## as.factor(state)12                                                        
## as.factor(state)13                                                        
## as.factor(state)14                                                        
## as.factor(state)15                                                        
## as.factor(state)16                                                        
## as.factor(state)17                                                        
## as.factor(state)18                                                        
## as.factor(state)19                                                        
## as.factor(state)20                                                        
## as.factor(state)21                                                        
## as.factor(state)22                                                        
## as.factor(state)23                                                        
## as.factor(state)24                                                        
## as.factor(state)25  0.505                                                 
## as.factor(state)26  0.504  0.503                                          
## as.factor(state)27  0.496  0.499  0.499                                   
## as.factor(state)28  0.500  0.501  0.501  0.500                            
## as.factor(state)29  0.502  0.502  0.502  0.500  0.501                     
## as.factor(state)30  0.498  0.500  0.500  0.500  0.500  0.500              
## as.factor(state)31  0.497  0.500  0.500  0.500  0.500  0.500  0.500       
## as.factor(state)32  0.502  0.502  0.502  0.500  0.501  0.502  0.500  0.500
## as.factor(state)33  0.506  0.504  0.503  0.498  0.501  0.502  0.500  0.499
## as.factor(state)34  0.502  0.502  0.502  0.500  0.501  0.502  0.500  0.500
## as.factor(state)35  0.502  0.502  0.502  0.500  0.501  0.501  0.500  0.500
## as.factor(state)36  0.504  0.503  0.503  0.499  0.501  0.502  0.500  0.500
## as.factor(state)37  0.507  0.504  0.504  0.498  0.501  0.503  0.499  0.499
## as.factor(state)38  0.500  0.501  0.501  0.500  0.501  0.501  0.500  0.500
## as.factor(state)39  0.499  0.501  0.501  0.500  0.501  0.501  0.500  0.500
## as.factor(state)40  0.504  0.503  0.503  0.499  0.501  0.502  0.500  0.500
## as.factor(state)41  0.499  0.501  0.501  0.500  0.501  0.501  0.500  0.500
## as.factor(state)42  0.503  0.503  0.502  0.499  0.501  0.502  0.500  0.500
## as.factor(state)43  0.500  0.501  0.501  0.500  0.501  0.501  0.500  0.500
## as.factor(state)44  0.504  0.503  0.503  0.499  0.501  0.502  0.500  0.500
## as.factor(state)45  0.502  0.502  0.502  0.500  0.501  0.502  0.500  0.500
## as.factor(state)46  0.499  0.501  0.501  0.500  0.501  0.501  0.500  0.500
## as.factor(state)47  0.508  0.504  0.504  0.498  0.501  0.503  0.499  0.499
## as.factor(state)48  0.504  0.503  0.503  0.499  0.501  0.502  0.500  0.500
## as.factor(state)49  0.505  0.503  0.503  0.499  0.501  0.502  0.500  0.500
## as.factor(state)50  0.501  0.502  0.501  0.500  0.501  0.501  0.500  0.500
##                    a.()32 a.()33 a.()34 a.()35 a.()36 a.()37 a.()38 a.()39
## x                                                                         
## as.factor(state)2                                                         
## as.factor(state)3                                                         
## as.factor(state)4                                                         
## as.factor(state)5                                                         
## as.factor(state)6                                                         
## as.factor(state)7                                                         
## as.factor(state)8                                                         
## as.factor(state)9                                                         
## as.factor(state)10                                                        
## as.factor(state)11                                                        
## as.factor(state)12                                                        
## as.factor(state)13                                                        
## as.factor(state)14                                                        
## as.factor(state)15                                                        
## as.factor(state)16                                                        
## as.factor(state)17                                                        
## as.factor(state)18                                                        
## as.factor(state)19                                                        
## as.factor(state)20                                                        
## as.factor(state)21                                                        
## as.factor(state)22                                                        
## as.factor(state)23                                                        
## as.factor(state)24                                                        
## as.factor(state)25                                                        
## as.factor(state)26                                                        
## as.factor(state)27                                                        
## as.factor(state)28                                                        
## as.factor(state)29                                                        
## as.factor(state)30                                                        
## as.factor(state)31                                                        
## as.factor(state)32                                                        
## as.factor(state)33  0.502                                                 
## as.factor(state)34  0.501  0.502                                          
## as.factor(state)35  0.501  0.502  0.501                                   
## as.factor(state)36  0.502  0.503  0.502  0.502                            
## as.factor(state)37  0.502  0.505  0.502  0.502  0.504                     
## as.factor(state)38  0.501  0.501  0.501  0.501  0.501  0.501              
## as.factor(state)39  0.501  0.501  0.501  0.501  0.501  0.500  0.501       
## as.factor(state)40  0.502  0.503  0.502  0.502  0.503  0.503  0.501  0.501
## as.factor(state)41  0.501  0.501  0.501  0.501  0.501  0.500  0.501  0.501
## as.factor(state)42  0.502  0.503  0.502  0.502  0.502  0.503  0.501  0.501
## as.factor(state)43  0.501  0.501  0.501  0.501  0.501  0.501  0.501  0.501
## as.factor(state)44  0.502  0.503  0.502  0.502  0.503  0.503  0.501  0.501
## as.factor(state)45  0.502  0.502  0.501  0.501  0.502  0.502  0.501  0.501
## as.factor(state)46  0.501  0.500  0.501  0.501  0.501  0.500  0.501  0.500
## as.factor(state)47  0.502  0.505  0.502  0.502  0.504  0.506  0.501  0.500
## as.factor(state)48  0.502  0.503  0.502  0.502  0.503  0.504  0.501  0.501
## as.factor(state)49  0.502  0.504  0.502  0.502  0.503  0.504  0.501  0.501
## as.factor(state)50  0.501  0.502  0.501  0.501  0.501  0.501  0.501  0.501
##                    a.()40 a.()41 a.()42 a.()43 a.()44 a.()45 a.()46 a.()47
## x                                                                         
## as.factor(state)2                                                         
## as.factor(state)3                                                         
## as.factor(state)4                                                         
## as.factor(state)5                                                         
## as.factor(state)6                                                         
## as.factor(state)7                                                         
## as.factor(state)8                                                         
## as.factor(state)9                                                         
## as.factor(state)10                                                        
## as.factor(state)11                                                        
## as.factor(state)12                                                        
## as.factor(state)13                                                        
## as.factor(state)14                                                        
## as.factor(state)15                                                        
## as.factor(state)16                                                        
## as.factor(state)17                                                        
## as.factor(state)18                                                        
## as.factor(state)19                                                        
## as.factor(state)20                                                        
## as.factor(state)21                                                        
## as.factor(state)22                                                        
## as.factor(state)23                                                        
## as.factor(state)24                                                        
## as.factor(state)25                                                        
## as.factor(state)26                                                        
## as.factor(state)27                                                        
## as.factor(state)28                                                        
## as.factor(state)29                                                        
## as.factor(state)30                                                        
## as.factor(state)31                                                        
## as.factor(state)32                                                        
## as.factor(state)33                                                        
## as.factor(state)34                                                        
## as.factor(state)35                                                        
## as.factor(state)36                                                        
## as.factor(state)37                                                        
## as.factor(state)38                                                        
## as.factor(state)39                                                        
## as.factor(state)40                                                        
## as.factor(state)41  0.501                                                 
## as.factor(state)42  0.502  0.501                                          
## as.factor(state)43  0.501  0.501  0.501                                   
## as.factor(state)44  0.502  0.501  0.502  0.501                            
## as.factor(state)45  0.502  0.501  0.502  0.501  0.502                     
## as.factor(state)46  0.501  0.500  0.501  0.501  0.501  0.501              
## as.factor(state)47  0.503  0.500  0.503  0.501  0.503  0.502  0.500       
## as.factor(state)48  0.503  0.501  0.502  0.501  0.503  0.502  0.501  0.504
## as.factor(state)49  0.503  0.501  0.503  0.501  0.503  0.502  0.501  0.504
## as.factor(state)50  0.501  0.501  0.501  0.501  0.501  0.501  0.501  0.501
##                    a.()48 a.()49
## x                               
## as.factor(state)2               
## as.factor(state)3               
## as.factor(state)4               
## as.factor(state)5               
## as.factor(state)6               
## as.factor(state)7               
## as.factor(state)8               
## as.factor(state)9               
## as.factor(state)10              
## as.factor(state)11              
## as.factor(state)12              
## as.factor(state)13              
## as.factor(state)14              
## as.factor(state)15              
## as.factor(state)16              
## as.factor(state)17              
## as.factor(state)18              
## as.factor(state)19              
## as.factor(state)20              
## as.factor(state)21              
## as.factor(state)22              
## as.factor(state)23              
## as.factor(state)24              
## as.factor(state)25              
## as.factor(state)26              
## as.factor(state)27              
## as.factor(state)28              
## as.factor(state)29              
## as.factor(state)30              
## as.factor(state)31              
## as.factor(state)32              
## as.factor(state)33              
## as.factor(state)34              
## as.factor(state)35              
## as.factor(state)36              
## as.factor(state)37              
## as.factor(state)38              
## as.factor(state)39              
## as.factor(state)40              
## as.factor(state)41              
## as.factor(state)42              
## as.factor(state)43              
## as.factor(state)44              
## as.factor(state)45              
## as.factor(state)46              
## as.factor(state)47              
## as.factor(state)48              
## as.factor(state)49  0.503       
## as.factor(state)50  0.501  0.502
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.25641574 -0.74675763 -0.05429884  0.77045194  2.28249545 
## 
## Residual standard error: 13.97811 
## Degrees of freedom: 500 total; 449 residual

pggls is a function for the estimation of linear panel models by general feasible generalized least squares, either with or without fixed effects. General FGLS is based on a two-step estimation process: first a model is estimated by OLS (pooling), fixed effects (within) or first differences (fd), then its residuals are used to estimate an error covariance matrix for use in a feasible-GLS analysis. This framework allows the error covariance structure inside every group (if effect="individual", else symmetric) of observations to be fully unrestricted and is therefore robust against any type of intragroup heteroskedasticity and serial correlation. Conversely, this structure is assumed identical across groups and thus general FGLS estimation is inefficient under groupwise heteroskedasticity. Note also that this method requires estimation of T(T+1)/2 variance parameters, thus efficiency requires N > > T (if effect="individual", else the opposite). The model="random" and model="pooling" arguments both produce an unrestricted FGLS model as in Wooldridge, Ch. 10, although the former is deprecated and included only for retro–compatibility reasons. If model="within" (the default) then a FEGLS (fixed effects GLS, see ibid.) is estimated; if model="fd" a FDGLS (first-difference GLS).

#Running a model that allows for a serial correlation structure and heteroscedasticity within panel 
mod.pggls <- plm::pggls(y~x, data=state, 
                        index=c("state","time") ,model="random")
## Warning: 'random' argument to pggls() has been renamed as 'pooling'
summary(mod.pggls)
##  Random effects model
## 
## Call:
## plm::pggls(formula = y ~ x, data = state, model = "random", index = c("state", 
##     "time"))
## 
## Balanced Panel: n = 50, T = 10, N = 500
## 
## Residuals:
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -26.73069 -11.89724  -0.27186   0.09875  12.98997  24.66032 
## 
## Coefficients:
##              Estimate Std. Error z-value              Pr(>|z|)    
## (Intercept) 50.670251   0.571413  88.675 < 0.00000000000000022 ***
## x            1.969652   0.020175  97.631 < 0.00000000000000022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Total Sum of Squares: 1619500
## Residual Sum of Squares: 99177
## Multiple R-squared: 0.93876
#Running a fixed effects model and adding panel corrected standard errors
fe.lm.model <- lm(y~x+as.factor(state), data=state)
model.pcse<-pcse::pcse(fe.lm.model, groupN=state$state, groupT=state$time)

summary(model.pcse)
## 
##  Results: 
##  
##                     Estimate       PCSE    t value      Pr(>|t|)
## (Intercept)        38.505011 3.55907749 10.8188180  2.128884e-24
## x                   1.969086 0.02065518 95.3313354 4.332198e-300
## as.factor(state)2  15.093063 5.51996795  2.7342664  6.499183e-03
## as.factor(state)3   6.047563 6.49432565  0.9312073  3.522468e-01
## as.factor(state)4  12.334444 4.31250492  2.8601576  4.431746e-03
## as.factor(state)5   9.088581 5.42321933  1.6758646  9.446088e-02
## as.factor(state)6  12.887020 3.94443903  3.2671363  1.169729e-03
## as.factor(state)7  16.809425 5.20910112  3.2269339  1.342794e-03
## as.factor(state)8  18.706544 5.83644855  3.2051245  1.446316e-03
## as.factor(state)9   8.087048 3.84271245  2.1045155  3.588875e-02
## as.factor(state)10 11.274579 6.84842895  1.6463015  1.004014e-01
## as.factor(state)11 14.358481 6.54058436  2.1952903  2.865388e-02
## as.factor(state)12 15.713256 7.32555050  2.1449932  3.248928e-02
## as.factor(state)13  9.465211 7.18857771  1.3167015  1.886104e-01
## as.factor(state)14 10.229855 6.25030155  1.6366978  1.023943e-01
## as.factor(state)15 20.404478 6.02422296  3.3870722  7.686010e-04
## as.factor(state)16 15.849900 5.65429941  2.8031589  5.279629e-03
## as.factor(state)17 18.154672 5.03196662  3.6078682  3.434189e-04
## as.factor(state)18 12.151837 6.97133310  1.7431152  8.199803e-02
## as.factor(state)19 14.300434 4.13258091  3.4604123  5.908967e-04
## as.factor(state)20 11.019150 4.47424689  2.4627943  1.416024e-02
## as.factor(state)21 13.766580 5.42266099  2.5387130  1.146274e-02
## as.factor(state)22 15.883072 4.21217712  3.7707512  1.845164e-04
## as.factor(state)23  8.351756 6.76223224  1.2350590  2.174544e-01
## as.factor(state)24  6.039102 6.11376657  0.9877874  3.237888e-01
## as.factor(state)25 12.809281 5.78944141  2.2125245  2.743290e-02
## as.factor(state)26 14.599543 4.63303650  3.1511824  1.734874e-03
## as.factor(state)27  7.816195 4.46469606  1.7506669  8.068625e-02
## as.factor(state)28 16.990049 7.38705312  2.2999766  2.190738e-02
## as.factor(state)29 11.107793 3.18189762  3.4909335  5.289274e-04
## as.factor(state)30 14.157462 4.62380734  3.0618625  2.331717e-03
## as.factor(state)31 17.503316 5.77288179  3.0319893  2.570104e-03
## as.factor(state)32 15.085383 6.17979948  2.4410797  1.502876e-02
## as.factor(state)33 10.766089 4.28800292  2.5107467  1.239798e-02
## as.factor(state)34  5.673854 5.46304993  1.0385872  2.995557e-01
## as.factor(state)35 17.029900 5.23530361  3.2528964  1.228515e-03
## as.factor(state)36 11.254995 5.62186374  2.0020042  4.588531e-02
## as.factor(state)37 10.731645 5.38830270  1.9916559  4.701416e-02
## as.factor(state)38  6.400210 5.17350942  1.2371119  2.166922e-01
## as.factor(state)39 13.346846 5.54482367  2.4070821  1.648323e-02
## as.factor(state)40 21.817332 4.66211220  4.6797098  3.808955e-06
## as.factor(state)41 19.631490 5.80715752  3.3805678  7.865578e-04
## as.factor(state)42 15.394579 3.22596032  4.7720920  2.470153e-06
## as.factor(state)43  4.502578 5.87845353  0.7659460  4.441109e-01
## as.factor(state)44  8.788492 4.95970394  1.7719792  7.707614e-02
## as.factor(state)45  3.917253 5.82686939  0.6722741  5.017550e-01
## as.factor(state)46  6.922103 4.88756789  1.4162674  1.573906e-01
## as.factor(state)47 11.188698 3.32536244  3.3646552  8.321487e-04
## as.factor(state)48 15.500484 6.25688867  2.4773469  1.360333e-02
## as.factor(state)49  7.193272 5.32479091  1.3509023  1.774071e-01
## as.factor(state)50 17.054444 4.23306715  4.0288622  6.581459e-05
## 
##  --------------------------------------------- 
##  
## # Valid Obs = 500; # Missing Obs = 0; Degrees of Freedom = 449.
### HOW TO RUN A PANEL MODEL WITH A LAGGED DEPENDENT VARIABLE (IF ONE ISN'T ALREADY CODED) ###
state$time <- as.numeric(state$time)
state$state <- as.numeric(state$state)
state$timelag <- state$time - 1

deps <- as.data.frame(cbind(state$state, state$time, state$y, state$yar1))

names(deps) <- c("state", "time", "yLag", "yar1lag")

state2 <- merge(x=state, y=deps, 
                by.x=c("timelag", "state"),
                by.y=c("time", "state"))
#cbind(state2$state, state2$time, state2$y, state2$yLag)

#OLS with LDV
mod.lagged <- lm(y~yLag+x, data=state2); summary(mod.lagged)
## 
## Call:
## lm(formula = y ~ yLag + x, data = state2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.3371 -12.2458  -0.1952  13.3304  24.5674 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 51.21601    0.90315   56.71   <2e-16 ***
## yLag        -0.00426    0.01184   -0.36    0.719    
## x            1.96293    0.02431   80.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.32 on 447 degrees of freedom
## Multiple R-squared:  0.9359, Adjusted R-squared:  0.9356 
## F-statistic:  3264 on 2 and 447 DF,  p-value: < 0.00000000000000022
lag.pcse<-pcse(mod.lagged,groupN=state2$state,groupT=state2$time)
summary(lag.pcse)
## 
##  Results: 
##  
##                Estimate       PCSE   t value      Pr(>|t|)
## (Intercept) 51.21601275 0.92731493 55.230441 8.369615e-202
## yLag        -0.00426034 0.01335459 -0.319017  7.498626e-01
## x            1.96292640 0.02404533 81.634405 1.060222e-270
## 
##  --------------------------------------------- 
##  
## # Valid Obs = 450; # Missing Obs = 0; Degrees of Freedom = 447.
### INTERPRETING DYNAMICS ###
# PULSE INPUT
lag.coef<-mod.lagged$coefficients[2]
#lag.coef<-.5 #hypothetical alternative, to illustrate a positive spillover
input.coef<-mod.lagged$coefficients[3]

times<-c(0:10)
pred.pulse<-input.coef*lag.coef^times
plot(y=c(0,pred.pulse),x=c(-1,times),type='l')

# STEP INPUT
pred.step<-cumsum(pred.pulse)
plot(y=c(0,pred.step),x=c(-1,times),type='l')

###UNIT ROOT TEST###
#purtest implements several testing procedures that have been 
#proposed to test unit root hypotheses with panel data.
#purtest(y ~ 1, data = state, index = "state", pmax=8, test = "levinlin")
#purtest(y ~ 1, data = state, index = "state", pmax=8, test = "ips")
#purtest(y ~ 1, data = state, index = "state", pmax=8, test = "madwu")
plm::purtest(y ~ 1, data = state, index = "state", pmax=8, test = "hadri")
## Warning in pdata.frame(data, index): column 'time' overwritten by time
## index
## 
##  Hadri Test (ex. var.: Individual Intercepts) (Heterosked.
##  Consistent)
## 
## data:  y ~ 1
## z = 1.0373, p-value = 0.1498
## alternative hypothesis: at least one series has a unit root
#################################################################

0.2.7 Marginal Models: Generalized Estimating Equations

#clean up
rm(list=ls())

#Load libraries
library(geepack)
library(reshape)
library(nlme)
library(car)
library(MuMIn)
library(multgee)
## Loading required package: gnm
## 
## Attaching package: 'gnm'
## The following object is masked from 'package:faraway':
## 
##     wheat
## The following object is masked from 'package:lattice':
## 
##     barley
## Loading required package: VGAM
## Loading required package: stats4
## Loading required package: splines
## 
## Attaching package: 'VGAM'
## The following object is masked from 'package:MuMIn':
## 
##     AICc
## The following object is masked from 'package:plm':
## 
##     has.intercept
## The following objects are masked from 'package:faraway':
## 
##     hormone, logit, pneumo, prplot
## The following object is masked from 'package:car':
## 
##     logit
## The following object is masked from 'package:lmtest':
## 
##     lrtest
###BINOMIAL EXAMPLE###
#muscatine<-read.table(file.choose(), header=TRUE, sep="")
muscatine.0<-read.table("muscatine.txt",header=TRUE,sep="")

#turn obesity into a numeric variable
muscatine.0$obese[muscatine.0$obese=="."]<-NA
muscatine.0$obese<-as.numeric(muscatine.0$obese==1)
muscatine<-na.omit(muscatine.0)

#sort the data!
muscatine<-with(muscatine,muscatine[order(id,cAge),])

#Longer Model
long.mod<-geepack::geeglm(obese~gender+I(cAge-12)+I((cAge-12)^2)+
                            gender:I(cAge-12)+gender:I((cAge-12)^2), 
                          id=id, waves=muscatine$occ, 
                          family=binomial(link="logit"), 
                          data=muscatine, scale.fix=TRUE,
                          corstr="exchangeable")
summary(long.mod)
## 
## Call:
## geepack::geeglm(formula = obese ~ gender + I(cAge - 12) + I((cAge - 
##     12)^2) + gender:I(cAge - 12) + gender:I((cAge - 12)^2), family = binomial(link = "logit"), 
##     data = muscatine, id = id, waves = muscatine$occ, corstr = "exchangeable", 
##     scale.fix = TRUE)
## 
##  Coefficients:
##                          Estimate   Std.err    Wald    Pr(>|W|)    
## (Intercept)             -1.211788  0.050551 574.645     < 2e-16 ***
## gender                   0.117138  0.071103   2.714      0.0995 .  
## I(cAge - 12)             0.038026  0.013351   8.111      0.0044 ** 
## I((cAge - 12)^2)        -0.017829  0.003391  27.652 0.000000145 ***
## gender:I(cAge - 12)      0.006873  0.018270   0.142      0.7068    
## gender:I((cAge - 12)^2)  0.004137  0.004631   0.798      0.3716    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale is fixed.
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.539 0.01564
## Number of clusters:   4856   Maximum cluster size: 3
#Normally scale.fix=FALSE
#'"independence"', '"exchangeable"', '"ar1"', '"unstructured"' and '"userdefined"'
#Could only get AR(1) and indepdendence to work.

#shorter model without interactions
short.mod<-update(long.mod,.~.-gender:I(cAge-12)-gender:I((cAge-12)^2))
summary(short.mod)
## 
## Call:
## geepack::geeglm(formula = obese ~ gender + I(cAge - 12) + I((cAge - 
##     12)^2), family = binomial(link = "logit"), data = muscatine, 
##     id = id, waves = muscatine$occ, corstr = "exchangeable", 
##     scale.fix = TRUE)
## 
##  Coefficients:
##                  Estimate  Std.err  Wald        Pr(>|W|)    
## (Intercept)      -1.22695  0.04770 661.6         < 2e-16 ***
## gender            0.14705  0.06271   5.5           0.019 *  
## I(cAge - 12)      0.04167  0.00910  20.9 0.0000047164240 ***
## I((cAge - 12)^2) -0.01570  0.00231  46.4 0.0000000000099 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale is fixed.
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.539  0.0156
## Number of clusters:   4856   Maximum cluster size: 3
#Wald test for whether trajectories differ
anova(long.mod,short.mod)
## Analysis of 'Wald statistic' Table
## 
## Model 1 obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + gender:I(cAge - 12) + gender:I((cAge - 12)^2) 
## Model 2 obese ~ gender + I(cAge - 12) + I((cAge - 12)^2)
##   Df    X2 P(>|Chi|)
## 1  2 0.952      0.62
#short model with AR(1) instead
short.mod.2<-geeglm(obese~gender+I(cAge-12)+I((cAge-12)^2), 
                    id=id, waves=muscatine$occ, 
                    family=binomial(link="logit"), 
                    data=muscatine, scale.fix=TRUE,
                    corstr="ar1")
summary(short.mod.2)
## 
## Call:
## geeglm(formula = obese ~ gender + I(cAge - 12) + I((cAge - 12)^2), 
##     family = binomial(link = "logit"), data = muscatine, id = id, 
##     waves = muscatine$occ, corstr = "ar1", scale.fix = TRUE)
## 
##  Coefficients:
##                  Estimate  Std.err   Wald        Pr(>|W|)    
## (Intercept)      -1.21953  0.04784 649.76         < 2e-16 ***
## gender            0.13147  0.06287   4.37           0.037 *  
## I(cAge - 12)      0.04108  0.00923  19.80 0.0000085928760 ***
## I((cAge - 12)^2) -0.01617  0.00235  47.41 0.0000000000058 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale is fixed.
## 
## Correlation: Structure = ar1  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.614  0.0145
## Number of clusters:   4856   Maximum cluster size: 3
#compare models with fit statistics
model.sel(long.mod,short.mod,short.mod.2,rank=QIC)
## Model selection table 
##             (Int) (cAg-12)^2 cAg-12   gnd gnd:I((cAg-12)^2) gnd:I(cAg-12)
## short.mod   -1.23    -0.0157 0.0417 0.147                                
## short.mod.2 -1.22    -0.0162 0.0411 0.132                                
## long.mod    -1.21    -0.0178 0.0380 0.117           0.00414       0.00687
##             corstr  qLik   QIC delta weight
## short.mod   exchng -5094 10196  0.00  0.466
## short.mod.2    ar1 -5094 10196  0.15  0.433
## long.mod    exchng -5094 10199  3.07  0.101
## Abbreviations:
## corstr: exchng = 'exchangeable'
## Models ranked by QIC(x)
###COUNT EXAMPLE###
#clean up
rm(list=ls())

#data
#leprosy<-read.table(file.choose(), header=TRUE, sep="")
leprosy<-read.table("leprosy.txt", header=TRUE, sep="")

#create id variable
leprosy$id<-c(1:nrow(leprosy))

#relevel treatment so that Placebo is the reference
leprosy$drug<-relevel(leprosy$drug,"C")

#create binary variable for whether an antibiotic was administered
leprosy$antibiotic<-1-as.numeric(leprosy$drug=="C")

#reshape data
m.leprosy<-melt.data.frame(data=leprosy, measure.vars=c("pre","post"), id=c("id","drug","antibiotic"))

#create time variable
m.leprosy$time<-as.numeric(m.leprosy$variable=="post")

#create inputs
m.leprosy$a<-as.numeric(m.leprosy$time==1 & m.leprosy$drug=="A")
m.leprosy$b<-as.numeric(m.leprosy$time==1 & m.leprosy$drug=="B")
m.leprosy$treat<-as.numeric(m.leprosy$time==1 & m.leprosy$antibiotic==1)

#sort the data
m.leprosy<-with(m.leprosy,m.leprosy[order(id,time),])

#Three Treatment Model
mod.3<-geeglm(value~time+a+b, id=id, 
              waves=m.leprosy$time, 
              family=poisson(link="log"), 
              data=m.leprosy,corstr="exchangeable")
summary(mod.3)
## 
## Call:
## geeglm(formula = value ~ time + a + b, family = poisson(link = "log"), 
##     data = m.leprosy, id = id, waves = m.leprosy$time, corstr = "exchangeable")
## 
##  Coefficients:
##             Estimate  Std.err   Wald Pr(>|W|)    
## (Intercept)  2.37335  0.08014 877.10   <2e-16 ***
## time        -0.00288  0.15701   0.00    0.985    
## a           -0.56257  0.22198   6.42    0.011 *  
## b           -0.49528  0.23420   4.47    0.034 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated Scale Parameters:
##             Estimate Std.err
## (Intercept)     3.21     0.5
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.738  0.0815
## Number of clusters:   30   Maximum cluster size: 2
#Wald test for whether treated patients differed from placebo patients overall
mod.3.alt<-update(mod.3,.~.-a-b); summary(mod.3.alt)
## 
## Call:
## geeglm(formula = value ~ time, family = poisson(link = "log"), 
##     data = m.leprosy, id = id, waves = m.leprosy$time, corstr = "exchangeable")
## 
##  Coefficients:
##             Estimate Std.err   Wald Pr(>|W|)    
## (Intercept)   2.3734  0.0801 877.10   <2e-16 ***
## time         -0.3065  0.1010   9.21   0.0024 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated Scale Parameters:
##             Estimate Std.err
## (Intercept)     3.75   0.625
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.705   0.094
## Number of clusters:   30   Maximum cluster size: 2
anova(mod.3,mod.3.alt)
## Analysis of 'Wald statistic' Table
## 
## Model 1 value ~ time + a + b 
## Model 2 value ~ time
##   Df   X2 P(>|Chi|)  
## 1  2 7.34     0.025 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Two Treatment Model
mod.2<-geeglm(value~time+treat, id=id, 
              waves=m.leprosy$time, 
              family=poisson(link="log"), 
              data=m.leprosy,corstr="exchangeable")
summary(mod.2)
## 
## Call:
## geeglm(formula = value ~ time + treat, family = poisson(link = "log"), 
##     data = m.leprosy, id = id, waves = m.leprosy$time, corstr = "exchangeable")
## 
##  Coefficients:
##             Estimate  Std.err   Wald Pr(>|W|)    
## (Intercept)  2.37335  0.08014 877.10   <2e-16 ***
## time        -0.00286  0.15700   0.00   0.9855    
## treat       -0.52783  0.19883   7.05   0.0079 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated Scale Parameters:
##             Estimate Std.err
## (Intercept)     3.23    0.52
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.738   0.081
## Number of clusters:   30   Maximum cluster size: 2
#compare models with fit statistics
model.sel(mod.2,mod.3,rank=QIC)
## Model selection table 
##       (Intrc)     time  treat      a      b qLik   QIC delta weight
## mod.2    2.37 -0.00286 -0.528                711 -1419   0.0   0.55
## mod.3    2.37 -0.00288        -0.563 -0.495  711 -1419   0.4   0.45
## Models ranked by QIC(x)
###ORDINAL AND NOMINAL USING "multgee"###
rm(list=ls())

#ORDINAL#
data(arthritis)
alt.fitmod <- ordLORgee(ordered(-y)~sqrt(time)*factor(trt),
                        data=arthritis,id=id,
                        LORstr="time.exch",
                        repeated=time)
summary(alt.fitmod)
## GEE FOR ORDINAL MULTINOMIAL RESPONSES 
## version 1.6.0 modified 2017-07-10 
## 
## Link : Cumulative logit 
## 
## Local Odds Ratios:
## Structure:         time.exch
## Model:             3way
## Homogenous scores: TRUE
## 
## call:
## ordLORgee(formula = ordered(-y) ~ sqrt(time) * factor(trt), data = arthritis, 
##     id = id, repeated = time, LORstr = "time.exch")
## 
## Summary of residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -0.411  -0.273  -0.132   0.000  -0.061   0.939 
## 
## Number of Iterations: 4 
## 
## Coefficients:
##                         Estimate  san.se  san.z Pr(>|san.z|)    
## beta10                   -2.8370  0.2570 -11.04      < 2e-16 ***
## beta20                   -0.8938  0.2268  -3.94      0.00008 ***
## beta30                    0.8552  0.2233   3.83      0.00013 ***
## beta40                    2.8445  0.2838  10.02      < 2e-16 ***
## sqrt(time)                0.1086  0.1110   0.98      0.32794    
## factor(trt)2              0.0441  0.2948   0.15      0.88107    
## sqrt(time):factor(trt)2   0.2978  0.1606   1.85      0.06368 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Local Odds Ratios Estimates:
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
##  [1,] 0.00 0.00 0.00 0.00 3.18 2.87 2.63 1.36 3.18  2.87  2.63  1.36
##  [2,] 0.00 0.00 0.00 0.00 2.87 2.61 2.42 1.33 2.87  2.61  2.42  1.33
##  [3,] 0.00 0.00 0.00 0.00 2.63 2.42 2.25 1.30 2.63  2.42  2.25  1.30
##  [4,] 0.00 0.00 0.00 0.00 1.36 1.33 1.30 1.09 1.36  1.33  1.30  1.09
##  [5,] 3.18 2.87 2.63 1.36 0.00 0.00 0.00 0.00 3.18  2.87  2.63  1.36
##  [6,] 2.87 2.61 2.42 1.33 0.00 0.00 0.00 0.00 2.87  2.61  2.42  1.33
##  [7,] 2.63 2.42 2.25 1.30 0.00 0.00 0.00 0.00 2.63  2.42  2.25  1.30
##  [8,] 1.36 1.33 1.30 1.09 0.00 0.00 0.00 0.00 1.36  1.33  1.30  1.09
##  [9,] 3.18 2.87 2.63 1.36 3.18 2.87 2.63 1.36 0.00  0.00  0.00  0.00
## [10,] 2.87 2.61 2.42 1.33 2.87 2.61 2.42 1.33 0.00  0.00  0.00  0.00
## [11,] 2.63 2.42 2.25 1.30 2.63 2.42 2.25 1.30 0.00  0.00  0.00  0.00
## [12,] 1.36 1.33 1.30 1.09 1.36 1.33 1.30 1.09 0.00  0.00  0.00  0.00
## 
## pvalue of Null model: 0.000243
#NOMINAL#
#The largest group becomes the baseline. Our largest group is independent housing.
#y: 0=street living, 1=community living, 2=independent living
#Coefficient set 1 refers to street living against independent living.
#Coefficient set 2 refers to community living against independent living.
data(housing)
house.fitmod <- nomLORgee(y~factor(time)*sec,
                          data=housing,id=id, 
                          repeated=time, 
                          LORstr="time.exch")
summary(house.fitmod)
## GEE FOR NOMINAL MULTINOMIAL RESPONSES 
## version 1.6.0 modified 2017-07-10 
## 
## Link : Baseline Category Logit 
## 
## Local Odds Ratios:
## Structure:         time.exch
## Model:             3way
## Homogenous scores: TRUE
## 
## call:
## nomLORgee(formula = y ~ factor(time) * sec, data = housing, id = id, 
##     repeated = time, LORstr = "time.exch")
## 
## Summary of residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -0.586  -0.278  -0.129  -0.002   0.421   0.909 
## 
## Number of Iterations: 3 
## 
## Coefficients:
##                      Estimate  san.se san.z Pr(>|san.z|)    
## beta10                 1.6607  0.2503  6.64      < 2e-16 ***
## factor(time)6:1       -1.8701  0.3188 -5.87      < 2e-16 ***
## factor(time)12:1      -2.9251  0.3683 -7.94      < 2e-16 ***
## factor(time)24:1      -2.8136  0.3426 -8.21      < 2e-16 ***
## sec:1                 -0.5368  0.3370 -1.59       0.1112    
## factor(time)6:sec:1   -1.1822  0.4604 -2.57       0.0102 *  
## factor(time)12:sec:1   0.0792  0.4831  0.16       0.8698    
## factor(time)24:sec:1   0.0327  0.4656  0.07       0.9440    
## beta20                 1.1664  0.2627  4.44      0.00001 ***
## factor(time)6:2       -0.2545  0.3008 -0.85       0.3974    
## factor(time)12:2      -0.5705  0.3118 -1.83       0.0673 .  
## factor(time)24:2      -1.0410  0.3072 -3.39       0.0007 ***
## sec:2                 -0.1070  0.3476 -0.31       0.7581    
## factor(time)6:sec:2   -1.6234  0.4135 -3.93      0.00009 ***
## factor(time)12:sec:2  -2.0485  0.4454 -4.60      < 2e-16 ***
## factor(time)24:sec:2  -1.0496  0.4183 -2.51       0.0121 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Local Odds Ratios Estimates:
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 0.00 0.00 1.36 1.88 1.36 1.88 1.36 1.88
## [2,] 0.00 0.00 1.88 3.69 1.88 3.69 1.88 3.69
## [3,] 1.36 1.88 0.00 0.00 1.36 1.88 1.36 1.88
## [4,] 1.88 3.69 0.00 0.00 1.88 3.69 1.88 3.69
## [5,] 1.36 1.88 1.36 1.88 0.00 0.00 1.36 1.88
## [6,] 1.88 3.69 1.88 3.69 0.00 0.00 1.88 3.69
## [7,] 1.36 1.88 1.36 1.88 1.36 1.88 0.00 0.00
## [8,] 1.88 3.69 1.88 3.69 1.88 3.69 0.00 0.00
## 
## pvalue of Null model: <0.0001
###########################################################################

0.2.8 Generalized Linear Mixed Effects Models

#clean up
rm(list=ls())

#Load libraries
library(lme4)
library(reshape)
library(car)

###BINOMIAL EXAMPLE: OBESITY###
#muscatine<-read.table(file.choose(), header=TRUE, sep="")
muscatine.0<-read.table("muscatine.txt",header=TRUE,sep="")

#turn obesity into a numeric variable
muscatine.0$obese[muscatine.0$obese=="."]<-NA
muscatine.0$obese<-as.numeric(muscatine.0$obese==1)
muscatine<-na.omit(muscatine.0)

#sort the data!
muscatine<-with(muscatine,muscatine[order(id,cAge),])

#Longer Model
long.mod<-glmer(obese~gender+I(cAge-12)+I((cAge-12)^2)+
                  gender:I(cAge-12)+gender:I((cAge-12)^2)+(1|id), 
                family=binomial(link="logit"), data=muscatine)
summary(long.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + gender:I(cAge -  
##     12) + gender:I((cAge - 12)^2) + (1 | id)
##    Data: muscatine
## 
##      AIC      BIC   logLik deviance df.resid 
##     7926     7977    -3956     7912     9849 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7362 -0.0203 -0.0156 -0.0088  2.9493 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 143      11.9    
## Number of obs: 9856, groups:  id, 4856
## 
## Fixed effects:
##                         Estimate Std. Error z value      Pr(>|z|)    
## (Intercept)              -7.7170     0.2088  -36.96       < 2e-16 ***
## gender                    0.0457     0.2202    0.21       0.83558    
## I(cAge - 12)              0.1416     0.0426    3.32       0.00089 ***
## I((cAge - 12)^2)         -0.0673     0.0105   -6.44 0.00000000012 ***
## gender:I(cAge - 12)       0.0408     0.0593    0.69       0.49124    
## gender:I((cAge - 12)^2)   0.0184     0.0143    1.28       0.19884    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) gender I(A-12 I((A-1 g:I(A-1
## gender      -0.506                             
## I(cAge-12)  -0.100  0.040                      
## I((A-12)^2) -0.176  0.247 -0.112               
## gnd:I(A-12)  0.019 -0.076 -0.712  0.071        
## g:I((A-12)^  0.170 -0.360  0.076 -0.721 -0.130
#shorter model without interactions
short.mod<-update(long.mod,.~.-gender:I(cAge-12)-gender:I((cAge-12)^2))
summary(short.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + (1 | id)
##    Data: muscatine
## 
##      AIC      BIC   logLik deviance df.resid 
##     7925     7961    -3957     7915     9851 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.580 -0.020 -0.016 -0.009  3.173 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 142      11.9    
## Number of obs: 9856, groups:  id, 4856
## 
## Fixed effects:
##                  Estimate Std. Error z value           Pr(>|z|)    
## (Intercept)      -7.77506    0.20609  -37.73            < 2e-16 ***
## gender            0.17181    0.20326    0.85                0.4    
## I(cAge - 12)      0.16287    0.02989    5.45 0.0000000504423559 ***
## I((cAge - 12)^2) -0.05772    0.00723   -7.98 0.0000000000000014 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) gender I(A-12
## gender      -0.490              
## I(cAge-12)  -0.133 -0.008       
## I((A-12)^2) -0.089  0.000 -0.151
#Likelihood ratio test for whether trajectories differ
anova(long.mod,short.mod)
## Data: muscatine
## Models:
## short.mod: obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + (1 | id)
## long.mod: obese ~ gender + I(cAge - 12) + I((cAge - 12)^2) + gender:I(cAge - 
## long.mod:     12) + gender:I((cAge - 12)^2) + (1 | id)
##           Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## short.mod  5 7925 7961  -3957     7915                        
## long.mod   7 7926 7977  -3956     7912   2.4      2        0.3
###FIRST COUNT EXAMPLE: EPILEPSY SEIZURES###
#clean up
rm(list=ls())

#data
epilepsy<-read.table("epilepsy.txt", header=TRUE, sep="")

#reshape data
m.epilepsy<-melt.data.frame(data=epilepsy, 
                            measure.vars=c("t0","t1","t2","t3","t4"), 
                            id=c("id","treat","age"))

#create visit variable
m.epilepsy$visit<-as.numeric(substr(m.epilepsy$variable,2,2))

#create dummy for time
m.epilepsy$dummy<-1-as.numeric(m.epilepsy$visit==0)

#create weeks variable
m.epilepsy$weeks<-2*as.numeric(substr(m.epilepsy$variable,2,2))

#rescale seizures variable
m.epilepsy$logT[m.epilepsy$weeks==0]<-log(8)
m.epilepsy$logT[m.epilepsy$weeks!=0]<-log(2)

#Book Model
book.mod<-glmer(value~(dummy|id)+treat+dummy*treat, 
                offset=logT, family=poisson(link="log"), 
                data=m.epilepsy)
summary(book.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: value ~ (dummy | id) + treat + dummy * treat
##    Data: m.epilepsy
##  Offset: logT
## 
##      AIC      BIC   logLik deviance df.resid 
##     1864     1890     -925     1850      288 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.139 -0.707 -0.062  0.514  6.965 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  id     (Intercept) 0.500    0.707        
##         dummy       0.232    0.482    0.16
## Number of obs: 295, groups:  id, 59
## 
## Fixed effects:
##             Estimate Std. Error z value          Pr(>|z|)    
## (Intercept)   1.0708     0.1403    7.63 0.000000000000023 ***
## treat         0.0512     0.1927    0.27             0.790    
## dummy        -0.0005     0.1091    0.00             0.996    
## treat:dummy  -0.3062     0.1504   -2.04             0.042 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) treat  dummy 
## treat       -0.725              
## dummy        0.011 -0.013       
## treat:dummy -0.014  0.025 -0.709
#Outlier Deleted Model
no.outlier<-glmer(value~(dummy|id)+treat+dummy*treat, 
                  offset=logT, family=poisson(link="log"), 
                  subset=id!=49, data=m.epilepsy)
summary(no.outlier)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: value ~ (dummy | id) + treat + dummy * treat
##    Data: m.epilepsy
##  Offset: logT
##  Subset: id != 49
## 
##      AIC      BIC   logLik deviance df.resid 
##     1802     1827     -894     1788      283 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.125 -0.671 -0.057  0.514  6.996 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  id     (Intercept) 0.452    0.672        
##         dummy       0.215    0.464    0.05
## Number of obs: 290, groups:  id, 58
## 
## Fixed effects:
##             Estimate Std. Error z value           Pr(>|z|)    
## (Intercept)  1.06939    0.13407    7.98 0.0000000000000015 ***
## treat       -0.00801    0.18565   -0.04              0.966    
## dummy        0.00777    0.10640    0.07              0.942    
## treat:dummy -0.34588    0.14800   -2.34              0.019 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) treat  dummy 
## treat       -0.719              
## dummy       -0.088  0.057       
## treat:dummy  0.056 -0.073 -0.700
#Weeks Model
weeks.mod<-glmer(value~(weeks|id)+treat+weeks*treat, 
                 offset=logT, family=poisson(link="log"), 
                 data=m.epilepsy)
summary(weeks.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: value ~ (weeks | id) + treat + weeks * treat
##    Data: m.epilepsy
##  Offset: logT
## 
##      AIC      BIC   logLik deviance df.resid 
##     1924     1950     -955     1910      288 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.379 -0.723 -0.117  0.585  6.631 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  id     (Intercept) 0.52686  0.7259       
##         weeks       0.00503  0.0709   0.22
## Number of obs: 295, groups:  id, 59
## 
## Fixed effects:
##             Estimate Std. Error z value           Pr(>|z|)    
## (Intercept)   1.1039     0.1426    7.74 0.0000000000000098 ***
## treat         0.0175     0.1963    0.09              0.929    
## weeks        -0.0113     0.0168   -0.67              0.500    
## treat:weeks  -0.0467     0.0234   -2.00              0.046 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) treat  weeks 
## treat       -0.724              
## weeks        0.065 -0.053       
## treat:weeks -0.054  0.074 -0.694
#Plot expected counts for treated and untreated IF random effects are zero!
week.index<-c(0:8)
e.treated<-exp(1.10397+0.1748-(.01134+.04672)*week.index)
e.untreated<-exp(1.10397-(.01134)*week.index)
plot(y=e.treated,x=week.index,type='l',ylim=c(2,5))
lines(y=e.untreated,x=week.index,lty=2)

###SECOND COUNT EXAMPLE: REVISITING LEPROSY DATA###
#clean up
rm(list=ls())

#data
#leprosy<-read.table(file.choose(), header=TRUE, sep="")
leprosy<-read.table("leprosy.txt", header=TRUE, sep="")

#create id variable
leprosy$id<-c(1:nrow(leprosy))

#relevel treatment so that Placebo is the reference
leprosy$drug<-relevel(leprosy$drug,"C")

#create binary variable for whether an antibiotic was administered
leprosy$antibiotic<-1-as.numeric(leprosy$drug=="C")

#reshape data
m.leprosy<-melt.data.frame(data=leprosy, 
                           measure.vars=c("pre","post"), 
                           id=c("id","drug","antibiotic"))

#create time variable
m.leprosy$time<-as.numeric(m.leprosy$variable=="post")

#create inputs
m.leprosy$a<-as.numeric(m.leprosy$time==1 & m.leprosy$drug=="A")
m.leprosy$b<-as.numeric(m.leprosy$time==1 & m.leprosy$drug=="B")
m.leprosy$treat<-as.numeric(m.leprosy$time==1 & m.leprosy$antibiotic==1)

#sort the data
m.leprosy<-with(m.leprosy,m.leprosy[order(id,time),])

#Three Treatment Model
mod.3<-glmer(value~time+a+b+(1|id), 
             family=poisson(link="log"), 
             data=m.leprosy)
summary(mod.3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: value ~ time + a + b + (1 | id)
##    Data: m.leprosy
## 
##      AIC      BIC   logLik deviance df.resid 
##      362      373     -176      352       55 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8351 -0.6176  0.0249  0.5554  1.9403 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.28     0.529   
## Number of obs: 60, groups:  id, 30
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.24153    0.11411   19.64   <2e-16 ***
## time         0.00331    0.12260    0.03   0.9785    
## a           -0.60587    0.20179   -3.00   0.0027 ** 
## b           -0.52311    0.19464   -2.69   0.0072 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr) time   a     
## time -0.187              
## a    -0.035 -0.583       
## b    -0.027 -0.604  0.373
#Likelihood ratio test for whether treated patients 
#differed from placebo patients overall
mod.3.alt<-update(mod.3,.~.-a-b); summary(mod.3.alt)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: value ~ time + (1 | id)
##    Data: m.leprosy
## 
##      AIC      BIC   logLik deviance df.resid 
##      370      376     -182      364       57 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0118 -0.6351  0.0725  0.4371  2.0274 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.332    0.577   
## Number of obs: 60, groups:  id, 30
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.2186     0.1218   18.21   <2e-16 ***
## time         -0.3065     0.0848   -3.61   0.0003 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## time -0.295
anova(mod.3,mod.3.alt)
## Data: m.leprosy
## Models:
## mod.3.alt: value ~ time + (1 | id)
## mod.3: value ~ time + a + b + (1 | id)
##           Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)   
## mod.3.alt  3 370 376   -182      364                           
## mod.3      5 362 373   -176      352  11.6      2      0.003 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###########################################################

0.2.9 Missing Data in Panels and Dropout

#clean up
rm(list=ls())

#load libraries
library(lme4)
library(reshape)
library(mice)

#load data
amenorrhea<-read.table("amenorrhea.txt", header=TRUE, sep="")

#turn status into a numeric variable
amenorrhea$status[amenorrhea$status=="."]<-NA
amenorrhea$status<-as.numeric(amenorrhea$status==1)

#start time at 0 (necessary for convergence)
amenorrhea$time<-amenorrhea$time-1

#Create Experimental Time Terms
amenorrhea$dt<-amenorrhea$time*amenorrhea$dose
amenorrhea$dt2<-I(amenorrhea$time^2)*amenorrhea$dose

#Book Model from Chapter 14 (differ slightly with time rescale, but they converge
book.amenorrhea<-glmer(status~(1|id)+time+I(time^2)+dt+dt2, 
                       family=binomial(link="logit"), 
                       data=amenorrhea)
summary(book.amenorrhea)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: status ~ (1 | id) + time + I(time^2) + dt + dt2
##    Data: amenorrhea
## 
##      AIC      BIC   logLik deviance df.resid 
##     3925     3962    -1957     3913     3610 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.775 -0.469 -0.233  0.468  4.296 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 4.35     2.09    
## Number of obs: 3616, groups:  id, 1151
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.4604     0.1397  -17.61  < 2e-16 ***
## time          0.7561     0.1984    3.81  0.00014 ***
## I(time^2)     0.0340     0.0655    0.52  0.60385    
## dt            0.8861     0.2513    3.53  0.00042 ***
## dt2          -0.2579     0.0879   -2.93  0.00335 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr) time   I(t^2) dt    
## time      -0.417                     
## I(time^2)  0.238 -0.945              
## dt        -0.073 -0.618  0.646       
## dt2        0.057  0.598 -0.678 -0.956
#reshape long to wide
wide<-reshape(amenorrhea, idvar=c("id", "dose"), 
              timevar="time", direction="wide")

#multiple random imputation using predictive mean matching like the book
m<-5 #number of imputations, definitely worth increasing!
w.imp<-mice(wide, m=m, defaultMethod="pmm")
## 
##  iter imp variable
##   1   1  status.1  status.2  status.3
##   1   2  status.1  status.2  status.3
##   1   3  status.1  status.2  status.3
##   1   4  status.1  status.2  status.3
##   1   5  status.1  status.2  status.3
##   2   1  status.1  status.2  status.3
##   2   2  status.1  status.2  status.3
##   2   3  status.1  status.2  status.3
##   2   4  status.1  status.2  status.3
##   2   5  status.1  status.2  status.3
##   3   1  status.1  status.2  status.3
##   3   2  status.1  status.2  status.3
##   3   3  status.1  status.2  status.3
##   3   4  status.1  status.2  status.3
##   3   5  status.1  status.2  status.3
##   4   1  status.1  status.2  status.3
##   4   2  status.1  status.2  status.3
##   4   3  status.1  status.2  status.3
##   4   4  status.1  status.2  status.3
##   4   5  status.1  status.2  status.3
##   5   1  status.1  status.2  status.3
##   5   2  status.1  status.2  status.3
##   5   3  status.1  status.2  status.3
##   5   4  status.1  status.2  status.3
##   5   5  status.1  status.2  status.3
## Warning: Number of logged events: 8
#reshape imputed data sets to long form
long.data<-list(NA,m)
for(i in 1:m)
    {
        long.data[[i]]<-melt.data.frame(data=complete(w.imp,i),
                                measure.vars=c("status.0","status.1",
                                               "status.2","status.3"), 
                                id=c("id","dose"))
        long.data[[i]]$time<-as.numeric(substr(long.data[[i]]$variable,8,8))
        long.data[[i]]$dt<-long.data[[i]]$time*long.data[[i]]$dose
        long.data[[i]]$dt2<-I(long.data[[i]]$time^2)*long.data[[i]]$dose
    }

#Now: What does our model look like with imputed data?
#Note: "status" is now "value"
imputed.models<-list(NA,m)
for(i in 1:m)
    {
        imputed.models[[i]]<-glmer(value~(1|id)+time+I(time^2)+dt+dt2, 
                                   family=binomial(link="logit"), 
                                   data=long.data[[i]])
        summary(imputed.models[[i]])    
    }
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00118607
## (tol = 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00121979
## (tol = 0.001, component 1)
#Average coefficients
coeffs<-NULL
for(i in 1:m)
    {
        coeffs<-rbind(coeffs,fixef(imputed.models[[i]]))
        }
avg.coef<-apply(coeffs,2,mean)

#Between variance of coefficient estimates
between<-apply(coeffs,2,var)

#Within variance of coefficient estimates
errVars<-NULL
for(i in 1:m)
    {
        errVars<-rbind(errVars,diag(vcov(imputed.models[[i]])))
        }
within<-apply(errVars,2,mean)

#Obtain Standard Errors of Averaged Fixed Effects
final.se <- sqrt(within + ((m+1)/m)*between)

#t- or z-ratios
test.stats<-avg.coef/final.se

#degrees of freedom
deg.free <- (m-1)*(1+(1/(m+1))*within/between)^2

#p-values for z-test
p.values.z<-2*(1-pnorm(abs(test.stats)))

#p-values for t-test
p.values.t<-2*(1-pt(abs(test.stats), df=deg.free))

#All results from imputation
avg.coef
## (Intercept)        time   I(time^2)          dt         dt2 
##     -2.0731      0.3822     -0.0238      0.9690     -0.3163
final.se
## (Intercept)        time   I(time^2)          dt         dt2 
##      0.1685      0.2214      0.0726      0.4436      0.1499
test.stats
## (Intercept)        time   I(time^2)          dt         dt2 
##     -12.303       1.727      -0.327       2.184      -2.110
p.values.t
## (Intercept)        time   I(time^2)          dt         dt2 
##   0.0000447   0.1324795   0.7542532   0.0878135   0.0952300
#make a LaTeX table
library(xtable)
xtable(cbind(avg.coef,final.se,test.stats,p.values.t),digits=4)
## % latex table generated in R 3.5.1 by xtable 1.8-3 package
## % Fri Jan  4 21:10:03 2019
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
##   \hline
##  & avg.coef & final.se & test.stats & p.values.t \\ 
##   \hline
## (Intercept) & -2.0731 & 0.1685 & -12.3030 & 0.0000 \\ 
##   time & 0.3822 & 0.2214 & 1.7267 & 0.1325 \\ 
##   I(time\verb|^|2) & -0.0238 & 0.0726 & -0.3270 & 0.7543 \\ 
##   dt & 0.9690 & 0.4436 & 2.1842 & 0.0878 \\ 
##   dt2 & -0.3163 & 0.1499 & -2.1104 & 0.0952 \\ 
##    \hline
## \end{tabular}
## \end{table}
#Compare to ignoring missing
summary(book.amenorrhea)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: status ~ (1 | id) + time + I(time^2) + dt + dt2
##    Data: amenorrhea
## 
##      AIC      BIC   logLik deviance df.resid 
##     3925     3962    -1957     3913     3610 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.775 -0.469 -0.233  0.468  4.296 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 4.35     2.09    
## Number of obs: 3616, groups:  id, 1151
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.4604     0.1397  -17.61  < 2e-16 ***
## time          0.7561     0.1984    3.81  0.00014 ***
## I(time^2)     0.0340     0.0655    0.52  0.60385    
## dt            0.8861     0.2513    3.53  0.00042 ***
## dt2          -0.2579     0.0879   -2.93  0.00335 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr) time   I(t^2) dt    
## time      -0.417                     
## I(time^2)  0.238 -0.945              
## dt        -0.073 -0.618  0.646       
## dt2        0.057  0.598 -0.678 -0.956
###########################################################

0.2.10 Event History Models: Parameteric Models and Cox Proportional Hazards Models

#clean up
rm(list=ls())

#packages
library(foreign)    #Stata data
library(survival)   #Sufficient for Cox model
## 
## Attaching package: 'survival'
## The following objects are masked from 'package:faraway':
## 
##     rats, solder
library(eha)        #Parametric models and another version of Cox
## 
## Attaching package: 'eha'
## The following objects are masked from 'package:VGAM':
## 
##     dgompertz, dmakeham, pgompertz, pmakeham, qgompertz, qmakeham,
##     rgompertz, rmakeham
#load cabinet duration data from Brad Jones's website
#http://psfaculty.ucdavis.edu/bsjjones/cabinet.dta
cab<-read.dta("cabinet.dta")

###WEIBULL MODEL###
#Weibull Regression: Proportional hazards model with baseline hazard(s) 
#from the Weibull family of distributions. Allows for stratification 
#with different scale and shape in each stratum, and left truncated 
#and right censored data.

#results are consistent with p.61, except for the constant
weib.cabinet<-eha::weibreg(Surv(time=durat, event=censor)~
                        invest+polar+numst+format+postelec+caretakr, 
                      data=cab)
summary(weib.cabinet)
## Call:
## eha::weibreg(formula = Surv(time = durat, event = censor) ~ invest + 
##     polar + numst + format + postelec + caretakr, data = cab)
## 
## Covariate           Mean       Coef Exp(Coef)  se(Coef)    Wald p
## invest              0.332     0.383     1.466     0.137     0.005 
## polar              10.521     0.023     1.023     0.006     0.000 
## numst               0.713    -0.601     0.548     0.131     0.000 
## format              1.690     0.132     1.142     0.044     0.002 
## postelec            0.665    -0.879     0.415     0.138     0.000 
## caretakr            0.009     1.726     5.618     0.276     0.000 
## 
## log(scale)                    2.985    19.795     0.128     0.000 
## log(shape)                    0.258     1.294     0.050     0.000 
## 
## Events                    271 
## Total time at risk        5789.5 
## Max. log. likelihood      -1014.6 
## LR test statistic         172 
## Degrees of freedom        6 
## Overall p-value           0
###LOG-LOGISTIC MODEL###
#Parametric Proportional Hazards Regression
#Proportional hazards model with parametric baseline hazard(s). 
#Allows for stratification with different scale and shape 
#in each stratum, and left truncated and right censored data.
log.logis.cabinet<-eha::phreg(Surv(time=durat, event=censor)~
                           invest+polar+numst+format+postelec+caretakr, 
                         data=cab, dist="loglogistic")
summary(log.logis.cabinet)
## Call:
## eha::phreg(formula = Surv(time = durat, event = censor) ~ invest + 
##     polar + numst + format + postelec + caretakr, data = cab, 
##     dist = "loglogistic")
## 
## Covariate          W.mean      Coef Exp(Coef)  se(Coef)    Wald p
## (Intercept)                   2.939               2.816     0.297 
## invest              0.332     0.382     1.466     0.137     0.005 
## polar              10.521     0.023     1.023     0.006     0.000 
## numst               0.713    -0.595     0.552     0.132     0.000 
## format              1.690     0.132     1.141     0.044     0.003 
## postelec            0.665    -0.869     0.419     0.140     0.000 
## caretakr            0.009     1.741     5.704     0.279     0.000 
## 
## log(scale)                    5.187               2.323     0.026 
## log(shape)                    0.278               0.074     0.000 
## 
## Events                    271 
## Total time at risk        5789.5 
## Max. log. likelihood      -1014.6 
## LR test statistic         165.97 
## Degrees of freedom        6 
## Overall p-value           0
###COX PROPORTIONAL HAZARDS MODEL###
#Cox proportional hazards regression model. 
#Time dependent variables, time dependent strata, 
#multiple events per subject, and other extensions 
#are incorporated using the counting process formulation 
#of Andersen and Gill.

#Efron is Default, see p. 60
#Note the two possible interpretations
cox.cabinet<-survival::coxph(Surv(durat, censor)~
                     invest+polar+numst+format+postelec+caretakr, 
                   data=cab)
summary(cox.cabinet)
## Call:
## survival::coxph(formula = Surv(durat, censor) ~ invest + polar + 
##     numst + format + postelec + caretakr, data = cab)
## 
##   n= 314, number of events= 271 
## 
##              coef exp(coef) se(coef)     z      Pr(>|z|)    
## invest    0.38714   1.47276  0.13713  2.82        0.0048 ** 
## polar     0.02334   1.02361  0.00562  4.15 0.00003275722 ***
## numst    -0.58262   0.55843  0.13223 -4.41 0.00001051805 ***
## format    0.13001   1.13884  0.04387  2.96        0.0030 ** 
## postelec -0.86112   0.42269  0.14062 -6.12 0.00000000091 ***
## caretakr  1.71040   5.53116  0.28282  6.05 0.00000000147 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## invest       1.473      0.679     1.126     1.927
## polar        1.024      0.977     1.012     1.035
## numst        0.558      1.791     0.431     0.724
## format       1.139      0.878     1.045     1.241
## postelec     0.423      2.366     0.321     0.557
## caretakr     5.531      0.181     3.177     9.628
## 
## Concordance= 0.725  (se = 0.016 )
## Rsquare= 0.407   (max possible= 1 )
## Likelihood ratio test= 164  on 6 df,   p=<2e-16
## Wald test            = 176  on 6 df,   p=<2e-16
## Score (logrank) test = 216  on 6 df,   p=<2e-16
#Breslow, see p. 60
#Note the two possible interpretations
cox.cabinet.2<-coxph(Surv(durat, censor)~
                       invest+polar+numst+format+postelec+caretakr, 
                     data=cab, method="breslow")
summary(cox.cabinet.2)
## Call:
## coxph(formula = Surv(durat, censor) ~ invest + polar + numst + 
##     format + postelec + caretakr, data = cab, method = "breslow")
## 
##   n= 314, number of events= 271 
## 
##              coef exp(coef) se(coef)     z    Pr(>|z|)    
## invest    0.37841   1.45996  0.13742  2.75      0.0059 ** 
## polar     0.02245   1.02270  0.00562  3.99 0.000065612 ***
## numst    -0.56949   0.56581  0.13207 -4.31 0.000016166 ***
## format    0.12540   1.13360  0.04396  2.85      0.0043 ** 
## postelec -0.83272   0.43487  0.14044 -5.93 0.000000003 ***
## caretakr  1.54278   4.67757  0.28011  5.51 0.000000036 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## invest       1.460      0.685     1.115     1.911
## polar        1.023      0.978     1.011     1.034
## numst        0.566      1.767     0.437     0.733
## format       1.134      0.882     1.040     1.236
## postelec     0.435      2.300     0.330     0.573
## caretakr     4.678      0.214     2.701     8.099
## 
## Concordance= 0.725  (se = 0.016 )
## Rsquare= 0.384   (max possible= 1 )
## Likelihood ratio test= 152  on 6 df,   p=<2e-16
## Wald test            = 162  on 6 df,   p=<2e-16
## Score (logrank) test = 192  on 6 df,   p=<2e-16
#Alternative estimator in "eha"
cox.cabinet.3<-eha::coxreg(Surv(durat, censor)~
                        invest+polar+numst+format+postelec+caretakr, 
                      data=cab, method="breslow")
summary(cox.cabinet.3)
## Call:
## eha::coxreg(formula = Surv(durat, censor) ~ invest + polar + 
##     numst + format + postelec + caretakr, data = cab, method = "breslow")
## 
## Covariate           Mean       Coef     Rel.Risk   S.E.    Wald p
## invest              0.332     0.378     1.460     0.137     0.006 
## polar              10.521     0.022     1.023     0.006     0.000 
## numst               0.713    -0.569     0.566     0.132     0.000 
## format              1.690     0.125     1.134     0.044     0.004 
## postelec            0.665    -0.833     0.435     0.140     0.000 
## caretakr            0.009     1.543     4.678     0.280     0.000 
## 
## Events                    271 
## Total time at risk        5789.5 
## Max. log. likelihood      -1299.9 
## LR test statistic         152.37 
## Degrees of freedom        6 
## Overall p-value           0
#HW: load UN peacekeeping data from Brad Jones's website
#Run two event history models: one parametric and one Cox
#un<-read.dta("//spia.uga.edu/faculty_pages/monogan/teaching/pd/UNFINAL.dta")

###########################################################


###ADDITIONAL TOPICS FOR FINAL EVENT HISTORY CLASS###

###RESIDUAL ANALYSES###
#"survival" package works a bit better here.
#parametric survival regression model. 
#These are location-scale models for an arbitrary transform 
#of the time variable; the most common cases use a log 
#transformation, leading to accelerated failure time models.

#Weibull Model, accelerated failure time form. Residuals in time scale for "response".
weib.cabinet.b<-survival::survreg(Surv(time=durat, event=censor)~
                          invest+polar+numst+format+postelec+caretakr, 
                        data=cab, dist='weibull')
summary(weib.cabinet.b)
## 
## Call:
## survival::survreg(formula = Surv(time = durat, event = censor) ~ 
##     invest + polar + numst + format + postelec + caretakr, data = cab, 
##     dist = "weibull")
##                Value Std. Error     z              p
## (Intercept)  2.98543    0.12811 23.30        < 2e-16
## invest      -0.29582    0.10590 -2.79         0.0052
## polar       -0.01794    0.00428 -4.19 0.000027425315
## numst        0.46489    0.10058  4.62 0.000003800260
## format      -0.10237    0.03359 -3.05         0.0023
## postelec     0.67961    0.10438  6.51 0.000000000075
## caretakr    -1.33401    0.20175 -6.61 0.000000000038
## Log(scale)  -0.25762    0.05006 -5.15 0.000000265343
## 
## Scale= 0.773 
## 
## Weibull distribution
## Loglik(model)= -1015   Loglik(intercept only)= -1101
##  Chisq= 172 on 6 degrees of freedom, p= 1.7e-34 
## Number of Newton-Raphson Iterations: 5 
## n= 314
t.resid<-residuals(weib.cabinet.b,type="response")
plot(y=t.resid,x=cab$polar)
lines(lowess(x=cab$polar,y=t.resid),col='red')

#Log-Logistic Model, accelerated failure time form. Residuals in time scale for "response".
log.logis.cabinet.b<-survreg(Surv(time=durat, event=censor)~
                               invest+polar+numst+format+postelec+caretakr, 
                             data=cab, dist='loglogistic')
summary(log.logis.cabinet.b)
## 
## Call:
## survreg(formula = Surv(time = durat, event = censor) ~ invest + 
##     polar + numst + format + postelec + caretakr, data = cab, 
##     dist = "loglogistic")
##                Value Std. Error      z           p
## (Intercept)  2.72882    0.15959  17.10     < 2e-16
## invest      -0.33675    0.12781  -2.63      0.0084
## polar       -0.02220    0.00526  -4.22 0.000024793
## numst        0.48307    0.12125   3.98 0.000067745
## format      -0.10935    0.04197  -2.61      0.0092
## postelec     0.64088    0.12403   5.17 0.000000238
## caretakr    -1.26921    0.23103  -5.49 0.000000039
## Log(scale)  -0.56577    0.05114 -11.06     < 2e-16
## 
## Scale= 0.568 
## 
## Log logistic distribution
## Loglik(model)= -1025   Loglik(intercept only)= -1099
##  Chisq= 149 on 6 degrees of freedom, p= 1.4e-29 
## Number of Newton-Raphson Iterations: 4 
## n= 314
t2.resid<-residuals(log.logis.cabinet.b,type="response")
plot(y=t2.resid,x=cab$polar)
lines(lowess(x=cab$polar,y=t2.resid),col='red')

#Cox Model
m.resid<-residuals(cox.cabinet,type="martingale")
plot(y=m.resid,x=cab$polar)
lines(lowess(x=cab$polar,y=m.resid),col='red')

###FRAILTY MODELS###
#"survival" package works a bit better here.

#create an ID variable
cab$ID<-row.names(cab)

#Log-Logistic Model, accelerated failure time form. Residuals in time scale for "response".
log.logis.cabinet.b.frail<-survreg(Surv(time=durat, event=censor)~
                      invest+polar+numst+format+postelec+caretakr+
                        frailty.gaussian(ID,method="aic"), 
                      data=cab, dist='loglogistic')
#summary(log.logis.cabinet.b.frail)

#Cox Model
cox.cabinet.2.frail<-coxph(Surv(durat, censor)~
                      invest+polar+numst+format+postelec+caretakr+
                        frailty.gamma(ID,method="em"), 
                      data=cab, method="breslow")
summary(cox.cabinet.2.frail)
## Call:
## coxph(formula = Surv(durat, censor) ~ invest + polar + numst + 
##     format + postelec + caretakr + frailty.gamma(ID, method = "em"), 
##     data = cab, method = "breslow")
## 
##   n= 314, number of events= 271 
## 
##                           coef    se(coef) se2     Chisq DF p          
## invest                     0.3784 0.13742  0.13742  7.58 1  0.005900000
## polar                      0.0224 0.00562  0.00562 15.93 1  0.000066000
## numst                     -0.5695 0.13207  0.13207 18.59 1  0.000016000
## format                     0.1254 0.04396  0.04396  8.14 1  0.004300000
## postelec                  -0.8327 0.14044  0.14044 35.16 1  0.000000003
## caretakr                   1.5428 0.28011  0.28011 30.34 1  0.000000036
## frailty.gamma(ID, method                            0.00 0  0.900000000
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## invest       1.460      0.685     1.115     1.911
## polar        1.023      0.978     1.011     1.034
## numst        0.566      1.767     0.437     0.733
## format       1.134      0.882     1.040     1.236
## postelec     0.435      2.300     0.330     0.573
## caretakr     4.678      0.214     2.701     8.099
## 
## Iterations: 6 outer, 34 Newton-Raphson
##      Variance of random effect= 0.0000005   I-likelihood = -1299.9 
## Degrees of freedom for terms= 1 1 1 1 1 1 0 
## Concordance= 0.727  (se = 0.727 )
## Likelihood ratio test= 152  on 6 df,   p=<2e-16
###########################################################

0.2.11 Event History Models: Models for Discrete Time and Time-Varying Covariates

#Discrete-Time Model and Conditional Logit Code
#clean up
rm(list=ls())

#load libraries
library(foreign)
library(lme4)
library(survival)
library(mgcv)
## This is mgcv 1.8-26. For overview type 'help("mgcv-package")'.
## 
## Attaching package: 'mgcv'
## The following object is masked from 'package:VGAM':
## 
##     s
#Load Data
#cong<-read.dta('http://psfaculty.ucdavis.edu/bsjjones/career.dta')
cong<-read.dta('career.dta')

#Change Stata Names
cong$d<-cong[,34]
cong$d.2<-abs(1-cong$d)
cong$t<-cong[,35]

###SPECIFYING THE BASELINE HAZARD RATE IN A LOGIT MODEL###
###NOTE: STANDARD ERRORS ARE NOT CLUSTER-CORRECTED, AS IN THE BOOK###
###ALTERNATIVE: FRAILTY TERM BY MEMBER###
#Exponential
exp.cong<-glm(d~rep, family=binomial(link="logit"), data=cong)
summary(exp.cong)
## 
## Call:
## glm(formula = d ~ rep, family = binomial(link = "logit"), data = cong)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.417  -0.417  -0.381  -0.381   2.306  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.5863     0.0721  -35.88   <2e-16 ***
## rep           0.1894     0.1069    1.77    0.076 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2707.4  on 5053  degrees of freedom
## Residual deviance: 2704.3  on 5052  degrees of freedom
##   (476 observations deleted due to missingness)
## AIC: 2708
## 
## Number of Fisher Scoring iterations: 5
exp.cong.2<-lme4::glmer(d~rep+(1|memberid), 
                  family=binomial(link="logit"), 
                  data=cong)
summary(exp.cong.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: d ~ rep + (1 | memberid)
##    Data: cong
## 
##      AIC      BIC   logLik deviance df.resid 
##     2660     2679    -1327     2654     5051 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -0.682 -0.226 -0.192 -0.178  3.991 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  memberid (Intercept) 1.85     1.36    
## Number of obs: 5054, groups:  memberid, 944
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -2.870      0.117  -24.46   <2e-16 ***
## rep            0.164      0.154    1.07     0.29    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## rep -0.581
#Linear function of time
linear.cong<-glm(d~rep+t, 
                 family=binomial(link="logit"), data=cong)
summary(linear.cong)
## 
## Call:
## glm(formula = d ~ rep + t, family = binomial(link = "logit"), 
##     data = cong)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.464  -0.430  -0.400  -0.347   2.645  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.2574     0.1036  -21.79  < 2e-16 ***
## rep           0.1585     0.1073    1.48     0.14    
## t            -0.0756     0.0185   -4.09 0.000043 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2707.4  on 5053  degrees of freedom
## Residual deviance: 2686.0  on 5051  degrees of freedom
##   (476 observations deleted due to missingness)
## AIC: 2692
## 
## Number of Fisher Scoring iterations: 5
linear.cong.2<-glmer(d~rep+t+(1|memberid), 
                     family=binomial(link="logit"), 
                     data=cong)
summary(linear.cong.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: d ~ rep + t + (1 | memberid)
##    Data: cong
## 
##      AIC      BIC   logLik deviance df.resid 
##     2658     2684    -1325     2650     5050 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -0.763 -0.199 -0.180 -0.159  3.767 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  memberid (Intercept) 2.79     1.67    
## Number of obs: 5054, groups:  memberid, 944
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -3.2523     0.2675  -12.16   <2e-16 ***
## rep           0.1835     0.1732    1.06    0.289    
## t             0.0638     0.0366    1.74    0.082 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr) rep   
## rep -0.344       
## t   -0.868  0.067
#Quadratic function of time
quad.cong<-glm(d~rep+duration+I(duration^2), 
               family=binomial(link="logit"), 
               data=cong)
summary(quad.cong)
## 
## Call:
## glm(formula = d ~ rep + duration + I(duration^2), family = binomial(link = "logit"), 
##     data = cong)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.616  -0.421  -0.373  -0.337   2.472  
## 
## Coefficients:
##               Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)   -1.98500    0.12964  -15.31   < 2e-16 ***
## rep            0.16672    0.10754    1.55   0.12108    
## duration      -0.22354    0.04771   -4.68 0.0000028 ***
## I(duration^2)  0.01222    0.00349    3.50   0.00046 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2707.4  on 5053  degrees of freedom
## Residual deviance: 2675.5  on 5050  degrees of freedom
##   (476 observations deleted due to missingness)
## AIC: 2684
## 
## Number of Fisher Scoring iterations: 5
quad.cong.2<-glmer(d~rep+duration+I(duration^2)+(1|memberid), 
                   family=binomial(link="logit"), 
                   data=cong)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00319156
## (tol = 0.001, component 1)
summary(quad.cong.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: d ~ rep + duration + I(duration^2) + (1 | memberid)
##    Data: cong
## 
##      AIC      BIC   logLik deviance df.resid 
##     2657     2690    -1324     2647     5049 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -0.738 -0.216 -0.185 -0.168  3.857 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  memberid (Intercept) 2.31     1.52    
## Number of obs: 5054, groups:  memberid, 944
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -2.94441    0.28691  -10.26   <2e-16 ***
## rep            0.18354    0.16395    1.12    0.263    
## duration      -0.04940    0.07185   -0.69    0.492    
## I(duration^2)  0.00785    0.00453    1.73    0.083 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) rep    duratn
## rep         -0.296              
## duration    -0.823  0.025       
## I(duratn^2)  0.568  0.006 -0.891
## convergence code: 0
## Model failed to converge with max|grad| = 0.00319156 (tol = 0.001, component 1)
#Log of duration
log.cong<-glm(d~rep+log(duration), 
              family=binomial(link="logit"), 
              data=cong)
summary(log.cong)
## 
## Call:
## glm(formula = d ~ rep + log(duration), family = binomial(link = "logit"), 
##     data = cong)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.508  -0.415  -0.377  -0.338   2.552  
## 
## Coefficients:
##               Estimate Std. Error z value     Pr(>|z|)    
## (Intercept)    -2.1365     0.1014  -21.06      < 2e-16 ***
## rep             0.1560     0.1074    1.45         0.15    
## log(duration)  -0.3896     0.0679   -5.74 0.0000000096 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2707.4  on 5053  degrees of freedom
## Residual deviance: 2671.1  on 5051  degrees of freedom
##   (476 observations deleted due to missingness)
## AIC: 2677
## 
## Number of Fisher Scoring iterations: 5
log.cong.2<-glmer(d~rep+log(duration)+(1|memberid), 
                  family=binomial(link="logit"),
                  data=cong)
summary(log.cong.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: d ~ rep + log(duration) + (1 | memberid)
##    Data: cong
## 
##      AIC      BIC   logLik deviance df.resid 
##     2662     2688    -1327     2654     5050 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -0.674 -0.229 -0.195 -0.179  4.062 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  memberid (Intercept) 1.75     1.32    
## Number of obs: 5054, groups:  memberid, 944
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -2.8219     0.2228  -12.67   <2e-16 ***
## rep             0.1623     0.1519    1.07     0.29    
## log(duration)  -0.0321     0.1221   -0.26     0.79    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) rep   
## rep         -0.339       
## log(duratn) -0.853  0.044

Fits a generalized additive model (GAM) to data, the term ‘GAM’ being taken to include any quadratically penalized GLM and a variety of other models estimated by a quadratically penalised likelihood type approach (see family.mgcv). The degree of smoothness of model terms is estimated as part of fitting. gam can also fit any GLM subject to multiple quadratic penalties (including estimation of degree of penalization). Confidence/credible intervals are readily available for any quantity predicted using a fitted model.

Smooth terms are represented using penalized regression splines (or similar smoothers) with smoothing parameters selected by GCV/UBRE/AIC/REML or by regression splines with fixed degrees of freedom (mixtures of the two are permitted). Multi-dimensional smooths are available using penalized thin plate regression splines (isotropic) or tensor product splines (when an isotropic smooth is inappropriate), and users can add smooths. Linear functionals of smooths can also be included in models. For an overview of the smooths available see smooth.terms. For more on specifying models see gam.models, random.effects and linear.functional.terms. For more on model selection see gam.selection. Do read gam.check and choose.k.

#Smoothed spline
#Generalized Additive Models With Integrated Smoothness Estimation
spline.cong<-mgcv::gam(d~rep+s(duration), 
                 family=binomial(link="logit"), 
                 data=cong)
summary(spline.cong)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## d ~ rep + s(duration)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.6240     0.0736  -35.66   <2e-16 ***
## rep           0.1674     0.1076    1.56     0.12    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df Chi.sq     p-value    
## s(duration) 4.66   5.58   44.4 0.000000062 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.00938   Deviance explained =  1.8%
## UBRE = -0.4713  Scale est. = 1         n = 5054
plot(spline.cong)

spline.cong.2<-gam(d~rep+s(duration)+s(memberid,bs='re'), 
                   family=binomial(link="logit"), 
                   data=cong)
summary(spline.cong.2)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## d ~ rep + s(duration) + s(memberid, bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -2.436      0.130  -18.75   <2e-16 ***
## rep            0.153      0.108    1.42     0.16    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df Chi.sq     p-value    
## s(duration) 4.649   5.57  44.90 0.000000049 ***
## s(memberid) 0.747   1.00   3.01       0.045 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0103   Deviance explained = 1.93%
## UBRE = -0.47174  Scale est. = 1         n = 5054
plot(spline.cong.2)

###Cox/Conditional Logit Model###
#Estimates a logistic regression model by maximising the conditional likelihood. 
conditional.cong<-survival::clogit(d~rep+strata(t),
                         method="approximate",
                         data=cong)
summary(conditional.cong)
## Call:
## coxph(formula = Surv(rep(1, 5530L), d) ~ rep + strata(t), data = cong, 
##     method = "breslow")
## 
##   n= 5054, number of events= 382 
##    (476 observations deleted due to missingness)
## 
##      coef exp(coef) se(coef)    z Pr(>|z|)
## rep 0.153     1.165    0.103 1.48     0.14
## 
##     exp(coef) exp(-coef) lower .95 upper .95
## rep      1.17      0.858     0.952      1.43
## 
## Concordance= 0.517  (se = 0.015 )
## Rsquare= 0   (max possible= 0.615 )
## Likelihood ratio test= 2.19  on 1 df,   p=0.1
## Wald test            = 2.2  on 1 df,   p=0.1
## Score (logrank) test = 2.21  on 1 df,   p=0.1
###########################################################

0.2.12 Repeated Measures and Multiple Source

#clean up
rm(list=ls())

#packages
library(lme4)
library(nlme)
library(lattice)
library(geepack)
library(reshape)

###REPEATED MEASURES###
headache<-read.table("headache.txt", header=TRUE, sep="")
head(headache)
##   id center treatment.a sequence period treatment.b response
## 1  1      1           B        1      0           B     0.00
## 2  1      1           A        1      1           A    11.50
## 3  2      1           A        2      0           A    11.75
## 4  2      1           B        2      1           B    13.75
## 5  3      1           A        2      0           A     0.50
## 6  3      1           B        2      1           B     8.25
table(headache$treatment.a[headache$sequence==1],
      headache$period[headache$sequence==1])
##    
##       0   1
##   A   0 127
##   B 127   0
##   P   0   0
table(headache$treatment.a[headache$sequence==2],
      headache$period[headache$sequence==2])
##    
##       0   1
##   A 126   0
##   B   0 126
##   P   0   0
table(headache$treatment.a[headache$sequence==3],
      headache$period[headache$sequence==3])
##    
##      0  1
##   A  0  0
##   B  0 42
##   P 42  0
table(headache$treatment.a[headache$sequence==4],
      headache$period[headache$sequence==4])
##    
##      0  1
##   A  0  0
##   B 42  0
##   P  0 42
table(headache$treatment.a[headache$sequence==5],
      headache$period[headache$sequence==5])
##    
##      0  1
##   A  0 43
##   B  0  0
##   P 43  0
table(headache$treatment.a[headache$sequence==6],
      headache$period[headache$sequence==6])
##    
##      0  1
##   A 43  0
##   B  0  0
##   P  0 43
#relevel treatment
headache$treatment.a<-relevel(headache$treatment.a, "P")

#carryover effects
headache$carry.a<-as.numeric(headache$sequence%in%c(2,6) & headache$period==1)
headache$carry.b<-as.numeric(headache$sequence%in%c(1,4) & headache$period==1)

#MODEL WITH CARRYOVER EFFECTS
headache.mod <- lmer(response~as.factor(treatment.a)+
                       period+carry.a+carry.b+(1|id), 
                     data=headache)
summary(headache.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: response ~ as.factor(treatment.a) + period + carry.a + carry.b +  
##     (1 | id)
##    Data: headache
## 
## REML criterion at convergence: 4536
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8177 -0.5218  0.0942  0.6070  2.2486 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 5.05     2.25    
##  Residual             8.43     2.90    
## Number of obs: 846, groups:  id, 423
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)                7.815      0.304   25.70
## as.factor(treatment.a)A    2.506      0.349    7.19
## as.factor(treatment.a)B    1.952      0.348    5.61
## period                     0.301      0.415    0.73
## carry.a                   -0.878      0.526   -1.67
## carry.b                    0.186      0.524    0.36
## 
## Correlation of Fixed Effects:
##             (Intr) a.(.)A a.(.)B period carry.
## as.fctr(.)A -0.716                            
## as.fctr(.)B -0.716  0.565                     
## period       0.143 -0.330 -0.326              
## carry.a     -0.299  0.490  0.164 -0.763       
## carry.b     -0.296  0.163  0.485 -0.761  0.509
#Differences in treatment effects
num<-2.5057-1.9519; num
## [1] 0.554
dem<- sqrt((0.3486)^2 -2* 0.565*(0.3486)*(0.3480)+(0.3480)^2); dem
## [1] 0.325
z<-num/dem; z
## [1] 1.7
p<-2*(1-pnorm(abs(z))); p
## [1] 0.0883
#Differences in carryover effects
num.2<- -0.8775-0.1864; num.2
## [1] -1.06
dem.2<- sqrt((0.5257)^2 -2*0.509*(0.5257)*(0.5237)+(0.5237)^2); dem.2
## [1] 0.52
z.2<-num.2/dem.2; z.2
## [1] -2.05
p.2<-2*(1-pnorm(abs(z.2))); p.2
## [1] 0.0407
#MODEL WITH NO CARRYOVER
no.carryover <- lmer(response~as.factor(treatment.a)+
                       period+(1|id), 
                     data=headache)
summary(no.carryover)
## Linear mixed model fit by REML ['lmerMod']
## Formula: response ~ as.factor(treatment.a) + period + (1 | id)
##    Data: headache
## 
## REML criterion at convergence: 4541
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8262 -0.5288  0.0989  0.6108  2.1413 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 5.10     2.26    
##  Residual             8.43     2.90    
## Number of obs: 846, groups:  id, 423
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)               7.7341     0.2861   27.04
## as.factor(treatment.a)A   2.8410     0.3021    9.40
## as.factor(treatment.a)B   1.8202     0.3027    6.01
## period                    0.0242     0.1996    0.12
## 
## Correlation of Fixed Effects:
##             (Intr) a.(.)A a.(.)B
## as.fctr(.)A -0.721              
## as.fctr(.)B -0.721  0.707       
## period      -0.349 -0.001  0.001
#Differences in treatment effects
num.3<-2.84104-1.82015; num.3
## [1] 1.02
dem.3<- sqrt((0.30209)^2 -2*0.707*(0.30209)*(0.30269)+(0.30269)^2); dem.3
## [1] 0.231
z.3<-num.3/dem.3; z.3
## [1] 4.41
p.3<-2*(1-pnorm(abs(z.3))); p.3
## [1] 0.0000103
###MULTIPLE SOURCES###
#clean up
rm(list=ls())

#load data
ccs<-read.table("ccs.txt", header=TRUE, sep="",na.strings = ".")

#reshape long and then SORT!!!
ccs.long<-melt.data.frame(data=ccs, 
                          measure.vars=c("parentRep","teacherRep"), 
                          id=c("childID", "physical", "singlePar"))
ccs.long<-with(ccs.long,ccs.long[order(childID,variable),])

#relevel informant variable
ccs.long$variable<-relevel(ccs.long$variable, "teacherRep")

#GEE model
ccs.mod<-geepack::geeglm(value~as.factor(variable)*physical+singlePar, 
                id=childID,  
                family=binomial(link="logit"), 
                data=ccs.long, 
                scale.fix=TRUE, 
                corstr="exchangeable")
summary(ccs.mod)
## 
## Call:
## geepack::geeglm(formula = value ~ as.factor(variable) * physical + 
##     singlePar, family = binomial(link = "logit"), data = ccs.long, 
##     id = childID, corstr = "exchangeable", scale.fix = TRUE)
## 
##  Coefficients:
##                                       Estimate Std.err   Wald    Pr(>|W|)
## (Intercept)                            -1.6801  0.0999 282.74     < 2e-16
## as.factor(variable)parentRep           -0.4738  0.1180  16.13 0.000059206
## physical                                0.1423  0.1350   1.11      0.2917
## singlePar                               0.6137  0.1076  32.54 0.000000012
## as.factor(variable)parentRep:physical   0.4573  0.1571   8.47      0.0036
##                                          
## (Intercept)                           ***
## as.factor(variable)parentRep          ***
## physical                                 
## singlePar                             ***
## as.factor(variable)parentRep:physical ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale is fixed.
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.268   0.026
## Number of clusters:   2501   Maximum cluster size: 2
###########################################################

0.2.13 Models for Multiple Events

###REPEATED EVENTS COX MODEL###
##MILITARIZED INTERVENTIONS##
#clean up
rm(list=ls())

#load packages
library(foreign)
library(survival)

#load data
#http://psfaculty.ucdavis.edu/bsjjones/icpsr_omi_spellsplit.dta
interventions<-read.dta("icpsr_omi_spellsplit.dta")

#Model: compare to Box-Steffensmeier & Jones Table 10.2.
#Note: frailty terms would be ideal.
conditional.interventions<-clogit(event~pbal+ctg+idem+tdem+strata(RS),
                                  data=interventions)
summary(conditional.interventions)
## Call:
## coxph(formula = Surv(rep(1, 12780L), event) ~ pbal + ctg + idem + 
##     tdem + strata(RS), data = interventions, method = "exact")
## 
##   n= 9182, number of events= 500 
##    (3598 observations deleted due to missingness)
## 
##          coef exp(coef) se(coef)     z Pr(>|z|)    
## pbal -0.52876   0.58933  0.15871 -3.33  0.00086 ***
## ctg  -0.29427   0.74508  0.10779 -2.73  0.00633 ** 
## idem  0.01069   1.01074  0.00646  1.65  0.09794 .  
## tdem  0.01641   1.01655  0.00720  2.28  0.02257 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## pbal     0.589      1.697     0.432     0.804
## ctg      0.745      1.342     0.603     0.920
## idem     1.011      0.989     0.998     1.024
## tdem     1.017      0.984     1.002     1.031
## 
## Concordance= 0.566  (se = 0.016 )
## Rsquare= 0.003   (max possible= 0.296 )
## Likelihood ratio test= 24.1  on 4 df,   p=0.00008
## Wald test            = 24.6  on 4 df,   p=0.00006
## Score (logrank) test = 24.7  on 4 df,   p=0.00006
###COMPETING RISKS COX MODEL###
##MELANOMA PATIENT SURVIVAL TIME##
#clean up
rm(list=ls())

#load package and data
library(riskRegression)
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:plm':
## 
##     between
## The following object is masked from 'package:reshape':
## 
##     melt
## Loading required package: ggplot2
## Loading required package: prodlim
## riskRegression version 2018.10.03
data(Melanoma)
#backup file: Melanoma.csv

#Plan A: Different predictors for each cause.
fit1 <- riskRegression::CSC(list(Hist(time,status)~
                   sex,Hist(time,status)~
                   invasion+epicel+age),
            data=Melanoma)
print(fit1)
## riskRegression::CSC(formula = list(Hist(time, status) ~ sex, 
##     Hist(time, status) ~ invasion + epicel + age), data = Melanoma)
## 
## Right-censored response of a competing.risks model
## 
## No.Observations: 205 
## 
## Pattern:
##          
## Cause     event right.censored
##   1          57              0
##   2          14              0
##   unknown     0            134
## 
## 
## ----------> Cause:  1 
## 
## Call:
## survival::coxph(formula = survival::Surv(time, status) ~ sex, 
##     x = TRUE, y = TRUE)
## 
##   n= 205, number of events= 57 
## 
##          coef exp(coef) se(coef)   z Pr(>|z|)  
## sexMale 0.662     1.939    0.265 2.5    0.013 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## sexMale      1.94      0.516      1.15      3.26
## 
## Concordance= 0.59  (se = 0.034 )
## Rsquare= 0.03   (max possible= 0.937 )
## Likelihood ratio test= 6.15  on 1 df,   p=0.01
## Wald test            = 6.24  on 1 df,   p=0.01
## Score (logrank) test = 6.47  on 1 df,   p=0.01
## 
## 
## 
## ----------> Cause:  2 
## 
## Call:
## survival::coxph(formula = survival::Surv(time, status) ~ invasion + 
##     epicel + age, x = TRUE, y = TRUE)
## 
##   n= 205, number of events= 14 
## 
##                    coef exp(coef) se(coef)     z Pr(>|z|)    
## invasionlevel.1 -0.9130    0.4013   0.6411 -1.42  0.15438    
## invasionlevel.2 -1.2766    0.2790   1.1170 -1.14  0.25309    
## epicelpresent    0.3224    1.3804   0.5701  0.57  0.57173    
## age              0.0932    1.0977   0.0261  3.58  0.00035 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## invasionlevel.1     0.401      2.492    0.1142      1.41
## invasionlevel.2     0.279      3.584    0.0312      2.49
## epicelpresent       1.380      0.724    0.4516      4.22
## age                 1.098      0.911    1.0431      1.16
## 
## Concordance= 0.83  (se = 0.046 )
## Rsquare= 0.088   (max possible= 0.481 )
## Likelihood ratio test= 19  on 4 df,   p=0.0008
## Wald test            = 14  on 4 df,   p=0.007
## Score (logrank) test = 15  on 4 df,   p=0.005
#Plan B: Same predictors for each cause.
fit2 <- CSC(Hist(time,status)~
              sex+invasion+epicel+age,
            data=Melanoma)
print(fit2)
## CSC(formula = Hist(time, status) ~ sex + invasion + epicel + 
##     age, data = Melanoma)
## 
## Right-censored response of a competing.risks model
## 
## No.Observations: 205 
## 
## Pattern:
##          
## Cause     event right.censored
##   1          57              0
##   2          14              0
##   unknown     0            134
## 
## 
## ----------> Cause:  1 
## 
## Call:
## survival::coxph(formula = survival::Surv(time, status) ~ sex + 
##     invasion + epicel + age, x = TRUE, y = TRUE)
## 
##   n= 205, number of events= 57 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)    
## sexMale          0.81455   2.25817  0.27080  3.01  0.00263 ** 
## invasionlevel.1  0.95490   2.59842  0.32593  2.93  0.00339 ** 
## invasionlevel.2  1.37028   3.93647  0.38333  3.57  0.00035 ***
## epicelpresent   -0.96361   0.38151  0.30587 -3.15  0.00163 ** 
## age              0.01624   1.01637  0.00844  1.92  0.05439 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## sexMale             2.258      0.443     1.328     3.839
## invasionlevel.1     2.598      0.385     1.372     4.922
## invasionlevel.2     3.936      0.254     1.857     8.344
## epicelpresent       0.382      2.621     0.209     0.695
## age                 1.016      0.984     1.000     1.033
## 
## Concordance= 0.731  (se = 0.036 )
## Rsquare= 0.168   (max possible= 0.937 )
## Likelihood ratio test= 37.6  on 5 df,   p=0.0000005
## Wald test            = 33.7  on 5 df,   p=0.000003
## Score (logrank) test = 35.9  on 5 df,   p=0.000001
## 
## 
## 
## ----------> Cause:  2 
## 
## Call:
## survival::coxph(formula = survival::Surv(time, status) ~ sex + 
##     invasion + epicel + age, x = TRUE, y = TRUE)
## 
##   n= 205, number of events= 14 
## 
##                    coef exp(coef) se(coef)     z Pr(>|z|)    
## sexMale          0.2498    1.2838   0.5560  0.45   0.6532    
## invasionlevel.1 -0.8908    0.4103   0.6429 -1.39   0.1659    
## invasionlevel.2 -1.2675    0.2815   1.1246 -1.13   0.2597    
## epicelpresent    0.2762    1.3181   0.5806  0.48   0.6343    
## age              0.0914    1.0957   0.0262  3.48   0.0005 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## sexMale             1.284      0.779    0.4317      3.82
## invasionlevel.1     0.410      2.437    0.1164      1.45
## invasionlevel.2     0.282      3.552    0.0311      2.55
## epicelpresent       1.318      0.759    0.4224      4.11
## age                 1.096      0.913    1.0408      1.15
## 
## Concordance= 0.837  (se = 0.047 )
## Rsquare= 0.089   (max possible= 0.481 )
## Likelihood ratio test= 19.2  on 5 df,   p=0.002
## Wald test            = 14.4  on 5 df,   p=0.01
## Score (logrank) test = 15.8  on 5 df,   p=0.008
###MULTINOMIAL LOGIT COMPETING RISKS MODEL###
##MEANS OF CONGRESSIONAL EXIT##
#clean up
rm(list=ls())

#load package
library(nnet)
## 
## Attaching package: 'nnet'
## The following object is masked from 'package:mgcv':
## 
##     multinom
library(foreign)

#Load Data
cong<-read.dta('career.dta')

#model: compare to Box-Steffensmeier & Jones Table 10.6
#Note: frailty terms would be ideal. May need "MCMCglmm"
four.exits<-multinom(event~rep+redist+scandal+opengov+
                       opensen+leader+age+priorm+log(duration),
                     data=cong)
## # weights:  55 (40 variable)
## initial  value 8737.638427 
## iter  10 value 4831.129941
## iter  20 value 3948.051135
## iter  30 value 3296.132728
## iter  40 value 3077.559021
## iter  50 value 3065.565411
## final  value 3064.504945 
## converged
summary(four.exits)
## Call:
## multinom(formula = event ~ rep + redist + scandal + opengov + 
##     opensen + leader + age + priorm + log(duration), data = cong)
## 
## Coefficients:
##   (Intercept)     rep redist scandal opengov opensen  leader     age
## 1       -3.05 -0.1259   1.61    2.76  0.0820 -0.2812  -0.591  0.0387
## 2       -6.23 -0.0064   1.42    3.29  0.2325 -0.4574 -25.165  0.0434
## 3       -8.02  0.1864   1.36    1.23  0.0368  0.0943  -0.381  0.0820
## 4       -1.18  0.2823   1.52  -13.27  0.5099  1.0293  -1.557 -0.0591
##       priorm log(duration)
## 1 -0.0571540        -0.314
## 2 -0.0048450        -0.167
## 3 -0.0100659         0.515
## 4 -0.0000927         0.491
## 
## Std. Errors:
##   (Intercept)   rep redist      scandal opengov opensen          leader
## 1       0.357 0.125  0.291 0.3810760708   0.157   0.206 0.5237159581173
## 2       0.752 0.259  0.553 0.4326457972   0.307   0.434 0.0000000000035
## 3       0.461 0.143  0.327 0.4171685839   0.173   0.203 0.3068835106453
## 4       0.425 0.144  0.323 0.0000000935   0.156   0.157 1.0166972257154
##       age  priorm log(duration)
## 1 0.00733 0.00500        0.0925
## 2 0.01516 0.00494        0.1912
## 3 0.00861 0.00286        0.1178
## 4 0.00985 0.00269        0.1220
## 
## Residual Deviance: 6129 
## AIC: 6209
###########################################################

0.2.14 Multilevel Models

###CHAPTER 8: USING PACKAGES TO APPLY ADVANCED MODELS###

##REQUIRED DATA FILES: BPchap7.dta, SinghJTP.dta, LL.csv, and UN.csv

##SECTION 8.1: MULTILEVEL MODELS WITH lme4##
#clean up
rm(list=ls())

#load package
library(foreign)

#load and clean data
#evolution<-read.dta("http://j.mp/BPchap7")
evolution<-read.dta("BPchap7.dta")
evolution$female[evolution$female==9]<-NA
evolution<-subset(evolution,!is.na(female))

#load package
#install.packages("lme4")
library(lme4)

#estimate linear model of hours spent teaching evolution 
#with random effects by state
hours.ml<-lmer(hrs_allev~phase1+senior_c+ph_senior+
                 notest_p+ph_notest_p+female+biocred3+
                 degr3+evol_course+certified+idsci_trans+
                 confident+(1|st_fip),
               data=evolution)
summary(hours.ml)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## hrs_allev ~ phase1 + senior_c + ph_senior + notest_p + ph_notest_p +  
##     female + biocred3 + degr3 + evol_course + certified + idsci_trans +  
##     confident + (1 | st_fip)
##    Data: evolution
## 
## REML criterion at convergence: 5940
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.348 -0.714 -0.175  0.557  3.885 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  st_fip   (Intercept)  3.09    1.76    
##  Residual             67.87    8.24    
## Number of obs: 841, groups:  st_fip, 49
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   10.568      1.214    8.71
## phase1         0.758      0.443    1.71
## senior_c      -0.529      0.310   -1.71
## ph_senior     -0.527      0.270   -1.95
## notest_p       0.113      0.749    0.15
## ph_notest_p   -0.527      0.660   -0.80
## female        -0.970      0.603   -1.61
## biocred3       0.516      0.504    1.02
## degr3         -0.443      0.389   -1.14
## evol_course    2.389      0.627    3.81
## certified     -0.533      0.719   -0.74
## idsci_trans    1.728      1.116    1.55
## confident      2.674      0.447    5.98
## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
#SECTION 8.1.2: MULTILEVEL LOGISTIC REGRESSION#
#load packages
library(lme4)
library(foreign)

#load data
#voting<-read.dta("http://j.mp/SINGHjtp")
voting<-read.dta("SinghJTP.dta")

#estimate logistic regression model of voting for incumbents 
#with random effects by country-year
inc.linear.ml<-glmer(votedinc~distanceinc+(1|cntryyear),
                     family=binomial(link="logit"),
                     data=voting)
summary(inc.linear.ml)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: votedinc ~ distanceinc + (1 | cntryyear)
##    Data: voting
## 
##      AIC      BIC   logLik deviance df.resid 
##    41999    42025   -20996    41993    38208 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -1.541 -0.680 -0.428  0.955 14.835 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  cntryyear (Intercept) 0.207    0.455   
## Number of obs: 38211, groups:  cntryyear, 30
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.16175    0.08545    1.89    0.058 .  
## distanceinc -0.50122    0.00888  -56.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## distanceinc -0.184
#estimate logistic regression model of voting for incumbents 
#with random intercepts and random coefficients on ideological 
#distance by country-year
inc.linear.ml.2<-glmer(votedinc~distanceinc+(distanceinc|cntryyear),
                       family=binomial(link="logit"),
                       data=voting)
summary(inc.linear.ml.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: votedinc ~ distanceinc + (distanceinc | cntryyear)
##    Data: voting
## 
##      AIC      BIC   logLik deviance df.resid 
##    41074    41117   -20532    41064    38206 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.067 -0.701 -0.415  0.920 28.018 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev. Corr 
##  cntryyear (Intercept) 0.6167   0.785         
##            distanceinc 0.0981   0.313    -0.81
## Number of obs: 38211, groups:  cntryyear, 30
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.2620     0.1450    1.81    0.071 .  
## distanceinc  -0.5305     0.0581   -9.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## distanceinc -0.807
###THREE-LEVEL EXAMPLE FROM FITZMAURICE, LAIRD AND WARE TABLE 22.3###
#clean up
rm(list=ls())

#required libraries
library(lme4)

#load data
smoking<-read.table("tvsfp.txt", header=TRUE)

#Three-Level Model (students within classes within schools)
smoking.mod<-lmer(post~pre+schT+tvT+schT*tvT+
                    (1|schoolID)+(1|classID), 
                  data=smoking)
summary(smoking.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: post ~ pre + schT + tvT + schT * tvT + (1 | schoolID) + (1 |  
##     classID)
##    Data: smoking
## 
## REML criterion at convergence: 5373
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4987 -0.6976 -0.0172  0.6824  3.1460 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  classID  (Intercept) 0.0647   0.254   
##  schoolID (Intercept) 0.0386   0.197   
##  Residual             1.6023   1.266   
## Number of obs: 1600, groups:  classID, 135; schoolID, 28
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   1.7020     0.1254   13.57
## pre           0.3054     0.0259   11.79
## schT          0.6413     0.1609    3.98
## tvT           0.1821     0.1572    1.16
## schT:tvT     -0.3309     0.2246   -1.47
## 
## Correlation of Fixed Effects:
##          (Intr) pre    schT   tvT   
## pre      -0.442                     
## schT     -0.634  0.015              
## tvT      -0.645  0.008  0.501       
## schT:tvT  0.448  0.005 -0.716 -0.700
###Another neat dataset from Gelman & Hill. CBS 1988 exit poll data.###
#Logit Model of Bush Support in 1988 as a function of race and sex, 
#with a random effect for state
#elec.88<-read.table("http://www.stat.columbia.edu/~gelman/arm/examples/election88/polls.subset.dat", header=TRUE)

###########################################################

0.2.15 Sample Size and Power

#load package
library(pwr)

###Difference of means test? 
###What sample for an effect size of 1? 
###Number of observations per group.###
#Note: Effect size as defined by Cohen: 
#absolute difference divided by standard deviation
pwr.t.test(d=1, sig.level=.05, power=.8, 
           type="two.sample", alternative="two.sided")
## 
##      Two-sample t test power calculation 
## 
##               n = 16.7
##               d = 1
##       sig.level = 0.05
##           power = 0.8
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
#Hand calculation if you were content with a normal distribution. 
#Number of observations per group.
(qnorm(.975)+qnorm(.8))^2/(.5*(1-.5))/2
## [1] 15.7
#Hand calculation if you wanted to hunt-and-pack t-distribution 
#for degrees of freedom. Number of observations per group.
(qt(.975,df=32)+qt(.8,df=32))^2/(.5*(1-.5))/2
## [1] 16.7
###Two samples of different sizes. Pick one group, 
###find the size for the other group.###
pwr.t2n.test(d=0.6,n1=90,n2=NULL,alternative="greater",power=.8)
## 
##      t test power calculation 
## 
##              n1 = 90
##              n2 = 21.6
##               d = 0.6
##       sig.level = 0.05
##           power = 0.8
##     alternative = greater
pwr.t2n.test(d=0.6,n1=NULL,n2=60,alternative="greater",power=.8)
## 
##      t test power calculation 
## 
##              n1 = 24.6
##              n2 = 60
##               d = 0.6
##       sig.level = 0.05
##           power = 0.8
##     alternative = greater
###ANOVA: Can we explain 2% of variance with 1 predictor?
###Number of observations overall.###
pwr.f2.test(u=1, f2=.02/.98, sig.level=.05, power=.8)
## 
##      Multiple regression power calculation 
## 
##               u = 1
##               v = 385
##              f2 = 0.0204
##       sig.level = 0.05
##           power = 0.8
#Can we detect joint significance of three predictors if 
#they can explain an extra 10 percentage poitns of variance 
#if the model explains 60% total?
pwr.f2.test(u=3, f2=.10/.40, sig.level=.05, power=.8)
## 
##      Multiple regression power calculation 
## 
##               u = 3
##               v = 43.7
##              f2 = 0.25
##       sig.level = 0.05
##           power = 0.8
###Panel Example: Section 20.3.4 in book. Number of observations overall.###
#hard coded version
sigma.beta.2<-2.8 #variance of coefficient of interest
delta<-1.2 #minimum treatment effect
(((qnorm(.975)+qnorm(.9))^2)*sigma.beta.2)/(.5*(1-.5)*delta^2) #result
## [1] 81.7
#Equation 20.2 as a Function#
#Note that you may want to consider Euqation 20.4 to get a sense of sigma.beta.2.
eqn.20.2<-function(alpha,power,delta,sigma.beta.2,split){
    sample.size<-(((qnorm(1-(alpha/2))+qnorm(power))^2)*sigma.beta.2)/
      (split*(1-split)*delta^2) 
    return(sample.size)
}   

#replicate
eqn.20.2(.05,.9,1.2,2.8,.5)
## [1] 81.7
#content with 80% power
eqn.20.2(alpha=.05,power=.8,delta=1.2,sigma.beta.2=2.8,split=.5)
## [1] 61